framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,window_size,beam_width,attn_dtype,kv_cache_dtype,step,latency
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,64,128,1,fp8,fp8,0,4.874650637308757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,64,128,1,float16,float16,0,5.331407864888509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,64,128,1,float16,fp8,0,5.372186660766602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,64,128,1,float16,float16,0,5.397061030069987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,64,0,1,float16,float16,0,34.89833068847656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,64,0,1,fp8,fp8,0,29.261136372884113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,64,128,1,fp8,fp8,0,4.954607963562012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,64,0,1,float16,fp8,0,33.64184061686198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,64,128,1,float16,fp8,0,5.4363149007161455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,64,0,1,float16,float16,0,32.54792022705078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,64,128,1,float16,float16,0,5.430805206298828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,64,128,1,float16,fp8,0,5.471850713094075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,64,128,1,fp8,fp8,0,4.9928693771362305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,64,0,1,fp8,fp8,0,29.343488057454426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,64,0,1,float16,fp8,0,35.747301737467446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,64,0,1,float16,float16,0,32.877787272135414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,64,128,1,float16,float16,0,3.135509490966797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,64,128,1,float16,fp8,0,3.20362122853597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,64,128,1,fp8,fp8,0,2.98529052734375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,64,0,1,float16,fp8,0,35.14233144124349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,64,0,1,fp8,fp8,0,29.379168192545574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,64,0,1,float16,float16,0,17.48529561360677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,64,128,1,float16,float16,0,2.781989415486654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,64,128,1,float16,fp8,0,2.803599993387858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,64,0,1,float16,fp8,0,19.00814437866211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,64,128,1,fp8,fp8,0,2.5455733935038247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,64,0,1,fp8,fp8,0,15.238154093424479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,64,128,1,float16,float16,0,2.7875731786092124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,64,0,1,float16,float16,0,16.327882130940754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,64,128,1,float16,fp8,0,2.8113972345987954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,64,128,1,fp8,fp8,0,2.559600035349528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,64,0,1,fp8,fp8,0,14.786293029785156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,64,0,1,float16,fp8,0,17.27120590209961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,64,128,1,float16,float16,0,2.8045867284139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,64,0,1,float16,float16,0,16.601605733235676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,64,128,1,float16,fp8,0,2.8294827143351235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,64,0,1,fp8,fp8,0,14.800314585367838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,64,128,1,fp8,fp8,0,2.5786026318868003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,64,0,1,float16,fp8,0,16.076709747314453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,64,128,1,float16,float16,0,1.7065013249715169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,64,0,1,float16,float16,0,17.959082285563152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,64,128,1,float16,fp8,0,1.7493866284688313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,64,0,1,float16,float16,0,8.45962651570638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,64,128,1,fp8,fp8,0,1.6431999206542969
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,64,0,1,fp8,fp8,0,14.820789337158203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,64,128,1,float16,float16,0,1.5420907338460286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,64,0,1,float16,fp8,0,16.472272237141926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,64,128,1,float16,fp8,0,1.5534399350484211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,64,128,1,fp8,fp8,0,1.428666591644287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,64,0,1,float16,fp8,0,8.483711878458658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,64,0,1,fp8,fp8,0,7.817951838175456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,64,128,1,float16,float16,0,1.5482880274454753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,64,0,1,float16,float16,0,8.450458526611328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,64,128,1,float16,fp8,0,1.558634599049886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,64,0,1,fp8,fp8,0,7.598880132039388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,64,0,1,float16,fp8,0,8.18820826212565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,64,128,1,fp8,fp8,0,1.435205300649007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,64,128,1,float16,float16,0,1.5540746053059895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,64,0,1,float16,float16,0,8.715967814127604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,64,128,1,float16,fp8,0,1.5684107144673665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,64,128,1,fp8,fp8,0,1.4459360440572102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,64,0,1,fp8,fp8,0,7.603343963623047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,64,0,1,float16,fp8,0,8.219584147135416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,64,128,1,float16,float16,0,1.1838826338450115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,64,0,1,float16,float16,0,8.32207997639974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,64,128,1,float16,fp8,0,1.1845653057098389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,64,128,1,fp8,fp8,0,1.1076853275299072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,64,0,1,float16,float16,0,4.587178548177083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,64,0,1,fp8,fp8,0,7.613578796386719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,64,0,1,float16,fp8,0,8.47763188680013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,64,128,1,float16,float16,0,1.1862880388895671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,64,128,1,float16,fp8,0,1.186362663904826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,64,0,1,float16,fp8,0,4.590688069661458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,64,0,1,fp8,fp8,0,4.248160044352214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,64,128,1,fp8,fp8,0,1.1113813718159993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,64,0,1,float16,float16,0,4.641589482625325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,64,128,1,float16,float16,0,1.1853066285451253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,64,128,1,float16,fp8,0,1.1847253640492756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,64,0,1,fp8,fp8,0,4.244661331176758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,64,0,1,float16,fp8,0,4.553536097208659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,64,128,1,fp8,fp8,0,1.107589324315389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,64,0,1,float16,float16,0,4.559904098510742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,64,128,1,float16,float16,0,1.1814560095469158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,64,128,1,float16,fp8,0,1.1833919684092205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,64,0,1,fp8,fp8,0,4.244661331176758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,64,0,1,float16,fp8,0,4.5589494705200195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,64,128,1,fp8,fp8,0,1.108016014099121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,64,0,1,float16,float16,0,4.743776003519694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,64,0,1,fp8,fp8,0,4.24510924021403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,64,0,1,float16,fp8,0,4.56606388092041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,64,128,1,float16,float16,0,3.9757919311523438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,64,128,1,fp8,fp8,0,3.6162293752034507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,64,128,1,float16,fp8,0,4.006101290384929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,64,128,1,float16,float16,0,3.992272059122721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,64,0,1,float16,float16,0,20.53289540608724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,64,0,1,float16,fp8,0,19.77398427327474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,64,0,1,fp8,fp8,0,17.225541432698567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,64,128,1,float16,fp8,0,4.027584075927734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,64,128,1,fp8,fp8,0,3.661535898844401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,64,0,1,float16,float16,0,18.63904571533203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,64,128,1,float16,float16,0,4.019136110941569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,64,128,1,float16,fp8,0,4.052725474039714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,64,0,1,fp8,fp8,0,17.270511627197266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,64,0,1,float16,fp8,0,20.766639709472656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,64,128,1,fp8,fp8,0,3.6913439432779946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,64,128,1,float16,float16,0,2.3459572792053223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,64,0,1,float16,float16,0,20.36678949991862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,64,128,1,float16,fp8,0,2.4005600611368814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,64,128,1,fp8,fp8,0,2.234224001566569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,64,0,1,float16,float16,0,9.81888516743978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,64,0,1,fp8,fp8,0,17.303855895996094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,64,128,1,float16,float16,0,2.0860160191853843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,64,0,1,float16,fp8,0,21.450111389160156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,64,128,1,float16,fp8,0,2.1034933725992837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,64,0,1,float16,fp8,0,10.055440266927084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,64,0,1,fp8,fp8,0,9.088373184204102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,64,128,1,fp8,fp8,0,1.914474646250407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,64,128,1,float16,float16,0,2.095322608947754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,64,0,1,float16,float16,0,10.00107192993164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,64,128,1,float16,fp8,0,2.110149383544922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,64,0,1,fp8,fp8,0,8.755589167277018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,64,0,1,float16,fp8,0,9.963823954264322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,64,128,1,fp8,fp8,0,1.9232373237609863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,64,128,1,float16,float16,0,2.102831999460856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,64,0,1,float16,float16,0,9.989738464355469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,64,128,1,float16,fp8,0,2.1233439445495605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,64,0,1,fp8,fp8,0,8.766789118448893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,64,128,1,fp8,fp8,0,1.939077377319336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,64,0,1,float16,fp8,0,9.476666768391928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,64,128,1,float16,float16,0,1.2834773063659668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,64,0,1,float16,float16,0,9.785322825113932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,64,128,1,float16,fp8,0,1.3149226506551106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,64,0,1,float16,float16,0,5.042799949645996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,64,128,1,fp8,fp8,0,1.2376320362091064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,64,0,1,fp8,fp8,0,8.78056526184082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,64,0,1,float16,fp8,0,9.70358403523763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,64,128,1,float16,float16,0,1.1608373324076335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,64,128,1,float16,fp8,0,1.1693387031555176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,64,0,1,float16,fp8,0,5.074895858764648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,64,128,1,fp8,fp8,0,1.0770560105641682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,64,0,1,fp8,fp8,0,4.699354807535808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,64,128,1,float16,float16,0,1.1661972999572754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,64,0,1,float16,float16,0,4.923061370849609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,64,128,1,float16,fp8,0,1.1757973035176594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,64,0,1,float16,fp8,0,4.8836212158203125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,64,0,1,fp8,fp8,0,4.53331724802653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,64,128,1,fp8,fp8,0,1.0834773381551106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,64,128,1,float16,float16,0,1.171445369720459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,64,0,1,float16,float16,0,4.914117177327474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,64,128,1,float16,fp8,0,1.1814186573028564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,64,0,1,fp8,fp8,0,4.5412906010945635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,64,128,1,fp8,fp8,0,1.0902026494344075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,64,0,1,float16,fp8,0,4.9004160563151045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,64,128,1,float16,float16,0,0.8967626889546713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,64,0,1,float16,float16,0,4.900320053100586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,64,128,1,float16,fp8,0,0.89955735206604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,64,0,1,float16,float16,0,2.8148533503214517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,64,128,1,fp8,fp8,0,0.8440106709798177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,64,0,1,fp8,fp8,0,4.547573407491048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,64,0,1,float16,fp8,0,5.052645365397136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,64,128,1,float16,float16,0,0.8915306727091471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,64,128,1,float16,fp8,0,0.8918399810791016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,64,0,1,fp8,fp8,0,2.607797304789225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,64,0,1,float16,fp8,0,2.81713072458903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,64,128,1,fp8,fp8,0,0.8356373310089111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,64,0,1,float16,float16,0,2.7932427724202475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,64,128,1,float16,float16,0,0.8920693397521973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,64,0,1,float16,fp8,0,2.7938613891601562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,64,0,1,fp8,fp8,0,2.603498617808024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,64,128,1,float16,fp8,0,0.8933119773864746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,64,0,1,float16,float16,0,2.7971413930257163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,64,128,1,fp8,fp8,0,0.8401652971903483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,64,128,1,float16,float16,0,0.8981173038482666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,64,0,1,fp8,fp8,0,2.6013973553975425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,64,0,1,float16,fp8,0,2.799290657043457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,64,128,1,float16,fp8,0,0.8975199858347574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,64,128,1,fp8,fp8,0,0.8368053436279297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,64,0,1,float16,float16,0,2.800469398498535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,64,0,1,float16,fp8,0,2.803354581197103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,64,0,1,fp8,fp8,0,2.6051146189371743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,64,128,1,float16,float16,0,3.303098678588867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,64,128,1,fp8,fp8,0,3.0085652669270835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,64,128,1,float16,fp8,0,3.3312692642211914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,64,128,1,float16,float16,0,3.317018508911133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,64,0,1,float16,float16,0,13.524912516276041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,64,0,1,fp8,fp8,0,12.392864227294922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,64,128,1,float16,fp8,0,3.3440640767415366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,64,0,1,float16,fp8,0,13.704896291097006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,64,128,1,fp8,fp8,0,3.033466657002767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,64,0,1,float16,float16,0,13.47537612915039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,64,128,1,float16,float16,0,3.334202766418457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,64,128,1,float16,fp8,0,3.3653386433919272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,64,0,1,fp8,fp8,0,12.41430409749349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,64,128,1,fp8,fp8,0,3.059610684712728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,64,0,1,float16,fp8,0,14.027200063069662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,64,128,1,float16,float16,0,1.953935941060384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,64,0,1,float16,float16,0,13.699114481608072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,64,128,1,float16,fp8,0,1.9991787274678547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,64,128,1,fp8,fp8,0,1.8641494115193684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,64,0,1,float16,float16,0,7.546730677286784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,64,0,1,fp8,fp8,0,12.440911610921225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,64,0,1,float16,fp8,0,14.475696563720703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,64,128,1,float16,float16,0,1.738298734029134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,64,0,1,float16,fp8,0,7.14137077331543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,64,0,1,fp8,fp8,0,6.592949549357097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,64,128,1,float16,fp8,0,1.7537760734558105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,64,128,1,fp8,fp8,0,1.596944014231364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,64,128,1,float16,float16,0,1.7475840250651042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,64,0,1,float16,float16,0,7.283397038777669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,64,128,1,float16,fp8,0,1.7617066701253254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,64,0,1,fp8,fp8,0,6.320298512776692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,64,0,1,float16,fp8,0,7.044389088948567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,64,128,1,fp8,fp8,0,1.60753599802653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,64,128,1,float16,float16,0,1.7542932828267415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,64,0,1,float16,float16,0,6.829797108968099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,64,128,1,float16,fp8,0,1.7741066614786785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,64,0,1,fp8,fp8,0,6.328976313273112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,64,0,1,float16,fp8,0,6.847839991251628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,64,128,1,fp8,fp8,0,1.6198666890462239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,64,128,1,float16,float16,0,1.0716640154520671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,64,0,1,float16,float16,0,6.991343816121419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,64,128,1,float16,fp8,0,1.0996267000834148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,64,0,1,float16,float16,0,3.673301378885905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,64,0,1,fp8,fp8,0,6.342981338500977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,64,128,1,fp8,fp8,0,1.0350293318430583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,64,0,1,float16,fp8,0,7.182037353515625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,64,128,1,float16,float16,0,0.9716320037841797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,64,128,1,float16,fp8,0,0.9773920377095541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,64,0,1,float16,fp8,0,3.7009865442911782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,64,0,1,fp8,fp8,0,3.4294134775797525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,64,128,1,fp8,fp8,0,0.8999466896057129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,64,0,1,float16,float16,0,3.7758026123046875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,64,128,1,float16,float16,0,0.9756159782409668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,64,128,1,float16,fp8,0,0.9823359648386637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,64,0,1,float16,fp8,0,3.5478719075520835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,64,0,1,fp8,fp8,0,3.2912638982137046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,64,128,1,fp8,fp8,0,0.9045653343200684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,64,0,1,float16,float16,0,3.5857280095418296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,64,128,1,float16,float16,0,0.9794293244679769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,64,128,1,float16,fp8,0,0.9877706368764242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,64,0,1,fp8,fp8,0,3.296346664428711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,64,128,1,fp8,fp8,0,0.9108746846516927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,64,0,1,float16,fp8,0,3.557461420694987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,64,0,1,float16,float16,0,3.7427892684936523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,64,128,1,float16,float16,0,0.7510080337524414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,64,128,1,float16,fp8,0,0.7507200241088867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,64,128,1,fp8,fp8,0,0.7014613151550293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,64,0,1,float16,fp8,0,3.56821346282959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,64,0,1,float16,float16,0,2.083247979482015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,64,0,1,fp8,fp8,0,3.303605397542318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,64,128,1,float16,float16,0,0.7517653306325277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,64,128,1,float16,fp8,0,0.7520693143208822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,64,0,1,fp8,fp8,0,1.929946740468343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,64,0,1,float16,fp8,0,2.083834648132324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,64,0,1,float16,float16,0,2.06876802444458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,64,128,1,fp8,fp8,0,0.7049866517384847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,64,128,1,float16,float16,0,0.7504106362660726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,64,128,1,float16,fp8,0,0.74945600827535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,64,0,1,fp8,fp8,0,1.9293759663899739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,64,0,1,float16,fp8,0,2.070256074269613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,64,0,1,float16,float16,0,2.0713866551717124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,64,128,1,fp8,fp8,0,0.7033173243204752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,64,128,1,float16,float16,0,0.7510186831156412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,64,0,1,fp8,fp8,0,1.9288907051086426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,64,0,1,float16,fp8,0,2.0720106760660806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,64,128,1,float16,fp8,0,0.7519466876983643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,64,128,1,fp8,fp8,0,0.7011040051778158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,64,0,1,float16,float16,0,2.0751253763834634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,64,0,1,float16,fp8,0,2.0736053784688315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,64,0,1,fp8,fp8,0,1.9308427174886067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,64,128,1,float16,float16,0,5.186448097229004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,64,128,1,fp8,fp8,0,4.732015927632649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,64,128,1,float16,fp8,0,5.225317319234212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,64,128,1,float16,float16,0,5.256143887837728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,64,0,1,float16,float16,0,17.899861653645832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,64,0,1,fp8,fp8,0,16.543418884277344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,64,128,1,float16,fp8,0,5.288858731587728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,64,0,1,float16,fp8,0,19.848106384277344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,64,128,1,fp8,fp8,0,4.813957214355469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,64,0,1,float16,float16,0,18.968421936035156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,64,128,1,float16,float16,0,5.2863467534383135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,64,128,1,float16,fp8,0,5.326149304707845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,64,0,1,fp8,fp8,0,16.63023503621419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,64,0,1,float16,fp8,0,18.07209650675456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,64,128,1,fp8,fp8,0,4.853493372599284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,64,128,1,float16,float16,0,2.9961280822753906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,64,0,1,float16,float16,0,19.13650131225586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,64,128,1,float16,fp8,0,3.0584052403767905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,64,0,1,float16,float16,0,10.115584055582682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,64,128,1,fp8,fp8,0,2.8444318771362305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,64,0,1,fp8,fp8,0,16.66956329345703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,64,0,1,float16,fp8,0,20.17693837483724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,64,128,1,float16,float16,0,2.637615998586019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,64,128,1,float16,fp8,0,2.659994602203369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,64,0,1,fp8,fp8,0,8.786160151163736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,64,128,1,fp8,fp8,0,2.406208038330078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,64,0,1,float16,fp8,0,11.021461486816406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,64,0,1,float16,float16,0,9.640202840169271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,64,128,1,float16,float16,0,2.6506293614705405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,64,128,1,float16,fp8,0,2.672970771789551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,64,0,1,fp8,fp8,0,8.339402516682943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,64,0,1,float16,fp8,0,9.14620272318522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,64,128,1,fp8,fp8,0,2.4240266482035318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,64,0,1,float16,float16,0,9.965791702270508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,64,128,1,float16,float16,0,2.6624587376912436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,64,0,1,float16,fp8,0,9.564693450927734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,64,0,1,fp8,fp8,0,8.356202443440756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,64,128,1,float16,fp8,0,2.689045270284017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,64,128,1,fp8,fp8,0,2.443456013997396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,64,128,1,float16,float16,0,1.5639039675394695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,64,0,1,float16,float16,0,9.460378646850586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,64,128,1,float16,fp8,0,1.6013654073079426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,64,128,1,fp8,fp8,0,1.4956587155659993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,64,0,1,float16,float16,0,4.953285217285156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,64,0,1,fp8,fp8,0,8.375173568725586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,64,0,1,float16,fp8,0,9.106512069702148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,64,128,1,float16,float16,0,1.3945706685384114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,64,128,1,float16,fp8,0,1.4056906700134277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,64,0,1,float16,fp8,0,4.885509490966797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,64,0,1,fp8,fp8,0,4.488986651102702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,64,128,1,fp8,fp8,0,1.281765302022298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,64,0,1,float16,float16,0,4.743573188781738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,64,128,1,float16,float16,0,1.3997759819030762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,64,128,1,float16,fp8,0,1.411237398783366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,64,0,1,fp8,fp8,0,4.271930694580078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,64,0,1,float16,fp8,0,5.105546633402507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,64,128,1,fp8,fp8,0,1.2890400091807048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,64,0,1,float16,float16,0,4.62775452931722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,64,128,1,float16,float16,0,1.4068694114685059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,64,128,1,float16,fp8,0,1.4195520083109539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,64,0,1,float16,fp8,0,4.6375627517700195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,64,0,1,fp8,fp8,0,4.281541188557942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,64,128,1,fp8,fp8,0,1.2978400389353435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,64,0,1,float16,float16,0,4.641466776529948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,64,128,1,float16,float16,0,0.8613599936167399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,64,128,1,float16,fp8,0,0.8835893472035726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,64,0,1,float16,float16,0,2.5240586598714194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,64,128,1,fp8,fp8,0,0.8311200141906738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,64,0,1,fp8,fp8,0,4.289823849995931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,64,0,1,float16,fp8,0,4.653759956359863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,64,128,1,float16,float16,0,0.7822186946868896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,64,0,1,fp8,fp8,0,2.353029410044352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,64,0,1,float16,fp8,0,2.5493067105611167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,64,128,1,float16,fp8,0,0.7872052987416586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,64,128,1,fp8,fp8,0,0.7250239849090576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,64,0,1,float16,float16,0,2.4315412839253745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,64,128,1,float16,float16,0,0.7850026289621989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,64,0,1,float16,fp8,0,2.4225865999857583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,64,0,1,fp8,fp8,0,2.243061383565267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,64,128,1,float16,fp8,0,0.7906133333841959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,64,128,1,fp8,fp8,0,0.7298239866892496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,64,0,1,float16,float16,0,2.42520538965861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,64,128,1,float16,float16,0,0.7866826852162679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,64,0,1,fp8,fp8,0,2.2473546663920083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,64,0,1,float16,fp8,0,2.5288052558898926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,64,128,1,float16,fp8,0,0.7943840026855469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,64,128,1,fp8,fp8,0,0.7322080135345459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,64,0,1,float16,float16,0,2.4296693801879883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,64,128,1,float16,float16,0,0.6012373367945353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,64,128,1,float16,fp8,0,0.6019413471221924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,64,0,1,fp8,fp8,0,2.2531305948893228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,64,0,1,float16,float16,0,1.4590080579121907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,64,0,1,float16,fp8,0,2.436639944712321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,64,128,1,fp8,fp8,0,0.5642720063527426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,64,128,1,float16,float16,0,0.6059893369674683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,64,0,1,fp8,fp8,0,1.351904074350993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,64,0,1,float16,fp8,0,1.4598453839619954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,64,128,1,float16,fp8,0,0.6052586634953817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,64,0,1,float16,float16,0,1.4479519526163738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,64,128,1,fp8,fp8,0,0.566703995068868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,64,128,1,float16,float16,0,0.6009173393249512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,64,0,1,float16,fp8,0,1.4488213857014973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,64,0,1,fp8,fp8,0,1.351685365041097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,64,128,1,float16,fp8,0,0.6019999980926514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,64,0,1,float16,float16,0,1.4516587257385254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,64,128,1,fp8,fp8,0,0.567903995513916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,64,128,1,float16,float16,0,0.6046559810638428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,64,0,1,float16,fp8,0,1.4514880180358887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,64,0,1,fp8,fp8,0,1.3490667343139648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,64,128,1,float16,fp8,0,0.6050186554590861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,64,0,1,float16,float16,0,1.4532426198323567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,64,128,1,fp8,fp8,0,0.5678346554438273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,64,0,1,float16,fp8,0,1.4524854024251301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,64,0,1,fp8,fp8,0,1.3514666557312012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,64,128,1,float16,float16,0,3.8592427571614585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,64,128,1,fp8,fp8,0,3.5060640970865884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,64,128,1,float16,fp8,0,3.8889973958333335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,64,128,1,float16,float16,0,3.8757546742757163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,64,0,1,float16,float16,0,11.053904215494791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,64,0,1,fp8,fp8,0,9.99508285522461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,64,0,1,float16,fp8,0,10.880096435546875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,64,128,1,float16,fp8,0,3.910501480102539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,64,128,1,fp8,fp8,0,3.548442522684733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,64,0,1,float16,float16,0,11.18387222290039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,64,128,1,float16,float16,0,3.9016478856404624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,64,0,1,fp8,fp8,0,10.036981582641602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,64,0,1,float16,fp8,0,11.652634938557943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,64,128,1,float16,fp8,0,3.9362026850382485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,64,128,1,fp8,fp8,0,3.5815467834472656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,64,128,1,float16,float16,0,2.24233071009318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,64,0,1,float16,float16,0,12.4224001566569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,64,0,1,fp8,fp8,0,10.072506586710611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,64,0,1,float16,fp8,0,11.65008544921875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,64,128,1,float16,fp8,0,2.291605313618978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,64,0,1,float16,float16,0,6.615584055582683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,64,128,1,fp8,fp8,0,2.1281065940856934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,64,128,1,float16,float16,0,1.9771839777628581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,64,128,1,float16,fp8,0,1.994965394337972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,64,0,1,fp8,fp8,0,5.400170644124349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,64,0,1,float16,fp8,0,6.064512252807617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,64,128,1,fp8,fp8,0,1.8083252906799316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,64,0,1,float16,float16,0,5.492698669433594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,64,128,1,float16,float16,0,1.9874773025512695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,64,128,1,float16,fp8,0,2.0052587191263833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,64,0,1,fp8,fp8,0,5.070245424906413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,64,0,1,float16,fp8,0,5.512336095174153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,64,128,1,fp8,fp8,0,1.8207786877950032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,64,0,1,float16,float16,0,5.51414426167806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,64,128,1,float16,float16,0,1.9968959490458171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,64,0,1,fp8,fp8,0,5.083296140034993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,64,0,1,float16,fp8,0,5.5301971435546875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,64,128,1,float16,fp8,0,2.0174506505330405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,64,128,1,fp8,fp8,0,1.8350027402242024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,64,0,1,float16,float16,0,5.543263753255208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,64,128,1,float16,float16,0,1.1751786867777507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,64,128,1,float16,fp8,0,1.2042559782663982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,64,128,1,fp8,fp8,0,1.1264853477478027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,64,0,1,fp8,fp8,0,5.097061475118001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,64,0,1,float16,float16,0,3.141594568888346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,64,0,1,float16,fp8,0,5.553056081136067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,64,128,1,float16,float16,0,1.086405356725057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,64,128,1,float16,fp8,0,1.0580106576283772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,64,0,1,fp8,fp8,0,2.7774505615234375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,64,0,1,float16,fp8,0,3.0104106267293296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,64,0,1,float16,float16,0,2.8268585205078125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,64,128,1,fp8,fp8,0,0.9672319889068604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,64,128,1,float16,float16,0,1.0543413162231445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,64,128,1,float16,fp8,0,1.0637386639912922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,64,0,1,fp8,fp8,0,2.6151787439982095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,64,0,1,float16,fp8,0,2.8517173131306968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,64,0,1,float16,float16,0,2.841093381245931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,64,128,1,fp8,fp8,0,0.9728533426920573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,64,128,1,float16,float16,0,1.0645493666330974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,64,0,1,fp8,fp8,0,2.6196160316467285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,64,0,1,float16,fp8,0,2.8486080169677734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,64,128,1,float16,fp8,0,1.070698658625285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,64,128,1,fp8,fp8,0,0.9787840048472086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,64,0,1,float16,float16,0,2.8859841028849282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,64,128,1,float16,float16,0,0.6514666477839152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,64,128,1,float16,fp8,0,0.668287992477417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,64,0,1,float16,float16,0,1.5711414019266765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,64,0,1,float16,fp8,0,2.912202517191569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,64,0,1,fp8,fp8,0,2.6287946701049805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,64,128,1,fp8,fp8,0,0.6296106576919556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,64,128,1,float16,float16,0,0.5907466808954874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,64,0,1,float16,fp8,0,1.589354674021403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,64,0,1,fp8,fp8,0,1.4740533828735352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,64,128,1,float16,fp8,0,0.595199982325236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,64,0,1,float16,float16,0,1.4993866284688313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,64,128,1,fp8,fp8,0,0.5498026609420776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,64,128,1,float16,float16,0,0.5925386746724447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,64,0,1,float16,fp8,0,1.5046292940775554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,64,0,1,fp8,fp8,0,1.3906453450520833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,64,128,1,float16,fp8,0,0.5972533226013184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,64,0,1,float16,float16,0,1.5042239824930828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,64,128,1,fp8,fp8,0,0.552127997080485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,64,128,1,float16,float16,0,0.5967679818471273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,64,0,1,float16,fp8,0,1.5080587069193523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,64,0,1,fp8,fp8,0,1.3934133847554524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,64,128,1,float16,fp8,0,0.6010506550470988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,64,0,1,float16,float16,0,1.5105652809143066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,64,128,1,fp8,fp8,0,0.556165337562561
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,64,128,1,float16,float16,0,0.4600106477737427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,64,0,1,float16,float16,0,0.9399147033691406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,64,0,1,float16,fp8,0,1.514469305674235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,64,128,1,float16,fp8,0,0.45892266432444256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,64,0,1,fp8,fp8,0,1.3971412976582844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,64,128,1,fp8,fp8,0,0.43119998772939044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,64,128,1,float16,float16,0,0.4588053226470947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,64,0,1,fp8,fp8,0,0.8702507019042969
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,64,0,1,float16,fp8,0,0.9394559860229492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,64,128,1,float16,fp8,0,0.45900265375773114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,64,0,1,float16,float16,0,0.932042678197225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,64,128,1,fp8,fp8,0,0.4302560091018677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,64,0,1,float16,fp8,0,0.9334987004597982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,64,128,1,float16,float16,0,0.4583146572113037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,64,0,1,fp8,fp8,0,0.869429349899292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,64,0,1,float16,float16,0,0.9337440331776937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,64,128,1,fp8,fp8,0,0.4312906662623088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,64,128,1,float16,fp8,0,0.45818666617075604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,64,128,1,float16,float16,0,0.45998934904734295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,64,0,1,float16,fp8,0,0.9340853691101074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,64,0,1,fp8,fp8,0,0.8687840302785238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,64,0,1,float16,float16,0,0.9341013431549072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,64,128,1,float16,fp8,0,0.45977067947387695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,64,128,1,fp8,fp8,0,0.43118401368459064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,64,0,1,float16,fp8,0,0.9354399840037028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,64,0,1,fp8,fp8,0,0.869488000869751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,64,128,1,float16,float16,0,5.095669428507487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,64,128,1,fp8,fp8,0,4.643845240275065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,64,128,1,float16,fp8,0,5.132325490315755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,64,0,1,float16,float16,0,10.98028818766276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,64,0,1,fp8,fp8,0,10.096810658772787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,64,128,1,float16,float16,0,5.1583201090494795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,64,0,1,float16,fp8,0,11.01324208577474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,64,128,1,fp8,fp8,0,4.731472015380859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,64,128,1,float16,fp8,0,5.196181297302246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,64,0,1,float16,float16,0,11.070123036702475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,64,128,1,float16,float16,0,5.193615913391113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,64,0,1,float16,fp8,0,12.11728032430013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,64,0,1,fp8,fp8,0,10.180858612060547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,64,128,1,float16,fp8,0,5.232122739156087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,64,128,1,fp8,fp8,0,4.768842697143555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,64,0,1,float16,float16,0,12.075733184814453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,64,128,1,float16,float16,0,2.9188105265299478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,64,128,1,float16,fp8,0,2.9729334513346353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,64,0,1,fp8,fp8,0,10.220485051472982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,64,0,1,float16,float16,0,6.51848030090332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,64,0,1,float16,fp8,0,11.150367736816406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,64,128,1,fp8,fp8,0,2.7653013865152993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,64,128,1,float16,float16,0,2.557605266571045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,64,128,1,float16,fp8,0,2.5798239707946777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,64,0,1,fp8,fp8,0,5.5106455485026045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,64,0,1,float16,fp8,0,6.005791982014974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,64,0,1,float16,float16,0,5.511253356933594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,64,128,1,fp8,fp8,0,2.329360008239746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,64,128,1,float16,float16,0,2.569573402404785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,64,0,1,fp8,fp8,0,5.068304061889648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,64,128,1,float16,fp8,0,2.592346668243408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,64,0,1,float16,fp8,0,5.538576126098633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,64,128,1,fp8,fp8,0,2.3463145891825357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,64,0,1,float16,float16,0,5.530576070149739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,64,128,1,float16,float16,0,2.586613337198893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,64,0,1,fp8,fp8,0,5.081962585449219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,64,0,1,float16,fp8,0,5.55458132425944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,64,128,1,float16,fp8,0,2.611285368601481
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,64,128,1,fp8,fp8,0,2.3653759956359863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,64,0,1,float16,float16,0,5.557349522908528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,64,128,1,float16,float16,0,1.4945920308430989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,64,128,1,float16,fp8,0,1.527626673380534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,64,0,1,float16,float16,0,3.024314562479655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,64,0,1,float16,fp8,0,5.581823984781901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,64,0,1,fp8,fp8,0,5.103973388671875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,64,128,1,fp8,fp8,0,1.4216160774230957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,64,128,1,float16,float16,0,1.3233173688252766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,64,0,1,fp8,fp8,0,2.808234532674154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,64,0,1,float16,fp8,0,3.0569547017415366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,64,128,1,float16,fp8,0,1.3338079452514648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,64,0,1,float16,float16,0,2.811744054158529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,64,128,1,fp8,fp8,0,1.2107573350270588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,64,128,1,float16,float16,0,1.3284746805826824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,64,0,1,float16,fp8,0,2.8239145278930664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,64,0,1,fp8,fp8,0,2.5903306007385254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,64,128,1,fp8,fp8,0,1.2195680141448975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,64,0,1,float16,float16,0,2.822805404663086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,64,128,1,float16,fp8,0,1.3416213989257812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,64,128,1,float16,float16,0,1.3373120625813801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,64,0,1,float16,fp8,0,2.837285359700521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,64,0,1,fp8,fp8,0,2.598485310872396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,64,0,1,float16,float16,0,2.8362401326497397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,64,128,1,float16,fp8,0,1.3510826428731282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,64,128,1,fp8,fp8,0,1.2294080257415771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,64,128,1,float16,float16,0,0.7900319894154867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,64,128,1,float16,fp8,0,0.808784008026123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,64,0,1,float16,fp8,0,2.8492746353149414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,64,0,1,float16,float16,0,1.565824031829834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,64,0,1,fp8,fp8,0,2.6071093877156577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,64,128,1,fp8,fp8,0,0.756218671798706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,64,128,1,float16,float16,0,0.7057173252105713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,64,0,1,fp8,fp8,0,1.4602346420288086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,64,0,1,float16,fp8,0,1.5851467450459797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,64,128,1,float16,fp8,0,0.7117493152618408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,64,0,1,float16,float16,0,1.462058703104655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,64,128,1,fp8,fp8,0,0.6508479913075765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,64,128,1,float16,float16,0,0.7094826698303223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,64,0,1,fp8,fp8,0,1.35150941212972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,64,0,1,float16,fp8,0,1.4693172772725422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,64,128,1,float16,fp8,0,0.7158453464508057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,64,0,1,float16,float16,0,1.470842679341634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,64,128,1,fp8,fp8,0,0.6552266677220663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,64,0,1,float16,fp8,0,1.4743946393330891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,64,128,1,float16,float16,0,0.7134133179982504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,64,0,1,fp8,fp8,0,1.356592019399007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,64,0,1,float16,float16,0,1.4746294021606445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,64,128,1,fp8,fp8,0,0.6600480079650879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,64,128,1,float16,fp8,0,0.7199947039286295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,64,128,1,float16,float16,0,0.4406026601791382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,64,0,1,float16,float16,0,0.8393119970957438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,64,0,1,float16,fp8,0,1.481594721476237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,64,128,1,float16,fp8,0,0.45179200172424316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,64,0,1,fp8,fp8,0,1.3611680666605632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,64,128,1,fp8,fp8,0,0.42686935265858966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,64,128,1,float16,float16,0,0.3986399968465169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,64,0,1,float16,fp8,0,0.8510826428731283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,64,0,1,fp8,fp8,0,0.7887840270996094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,64,128,1,float16,fp8,0,0.40053868293762207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,64,0,1,float16,float16,0,0.7905440330505371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,64,128,1,fp8,fp8,0,0.3737279971440633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,64,0,1,float16,fp8,0,0.7927306493123373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,64,128,1,float16,float16,0,0.4022560119628906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,64,0,1,fp8,fp8,0,0.7339200178782145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,64,0,1,float16,float16,0,0.7923999627431234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,64,128,1,float16,fp8,0,0.4050133228302002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,64,128,1,fp8,fp8,0,0.3746826648712158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,64,0,1,float16,fp8,0,0.7955040136973063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,64,128,1,float16,float16,0,0.40370134512583417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,64,0,1,fp8,fp8,0,0.7374986807505289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,64,128,1,float16,fp8,0,0.4083893299102783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,64,128,1,fp8,fp8,0,0.37805867195129395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,64,0,1,float16,float16,0,0.7968746821085612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,64,128,1,float16,float16,0,0.3150346676508586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,64,0,1,fp8,fp8,0,0.7398239771525065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,64,0,1,float16,fp8,0,0.8014079729715983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,64,128,1,float16,fp8,0,0.3128160039583842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,64,128,1,fp8,fp8,0,0.29184534152348834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,64,0,1,float16,float16,0,0.525488018989563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,64,128,1,float16,float16,0,0.31063999732335407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,64,0,1,fp8,fp8,0,0.48429866631825763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,64,0,1,float16,fp8,0,0.5263839960098267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,64,128,1,float16,fp8,0,0.3114773432413737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,64,128,1,fp8,fp8,0,0.2919573386510213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,64,0,1,float16,float16,0,0.5190080006917318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,64,128,1,float16,float16,0,0.311354657014211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,64,0,1,fp8,fp8,0,0.48454399903615314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,64,0,1,float16,fp8,0,0.5208640098571777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,64,128,1,float16,fp8,0,0.310479998588562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,64,128,1,fp8,fp8,0,0.29176533222198486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,64,0,1,float16,float16,0,0.5206293265024821
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,64,128,1,float16,float16,0,0.3123253385225932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,64,0,1,fp8,fp8,0,0.4843413432439168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,64,0,1,float16,fp8,0,0.5194720029830933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,64,128,1,fp8,fp8,0,0.2916106581687927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,64,128,1,float16,fp8,0,0.31014400720596313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,64,0,1,float16,float16,0,0.5214186509450277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,64,0,1,fp8,fp8,0,0.48418132464090985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,64,0,1,float16,fp8,0,0.5205066601435343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,64,128,1,float16,float16,0,3.782719930013021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,64,128,1,fp8,fp8,0,3.4360745747884116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,64,128,1,float16,fp8,0,3.810890515645345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,64,0,1,float16,float16,0,6.912864049275716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,64,128,1,float16,float16,0,3.8056427637736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,64,0,1,fp8,fp8,0,6.334357579549153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,64,0,1,float16,fp8,0,6.940453211466472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,64,128,1,float16,fp8,0,3.837984085083008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,64,128,1,fp8,fp8,0,3.478794733683268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,64,0,1,float16,float16,0,6.94645881652832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,64,128,1,float16,float16,0,3.8308372497558594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,64,0,1,float16,fp8,0,6.974847793579102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,64,0,1,fp8,fp8,0,6.373578389485677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,64,0,1,float16,float16,0,6.977866490681966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,64,128,1,fp8,fp8,0,3.508864084879557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,64,128,1,float16,fp8,0,3.8627198537190757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,64,128,1,float16,float16,0,2.1835734049479165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,64,0,1,float16,fp8,0,7.008650461832683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,64,0,1,float16,float16,0,3.796095848083496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,64,128,1,float16,fp8,0,2.2272106806437173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,64,0,1,fp8,fp8,0,6.406976064046224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,64,128,1,fp8,fp8,0,2.067354679107666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,64,128,1,float16,float16,0,1.9182987213134766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,64,0,1,float16,fp8,0,3.841541290283203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,64,0,1,fp8,fp8,0,3.5302772521972656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,64,0,1,float16,float16,0,3.492389361063639
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,64,128,1,fp8,fp8,0,1.7471253077189128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,64,128,1,float16,fp8,0,1.935530662536621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,64,128,1,float16,float16,0,1.9295573234558105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,64,0,1,float16,fp8,0,3.5834080378214517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,64,0,1,fp8,fp8,0,3.2024799982706704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,64,0,1,float16,float16,0,3.5069545110066733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,64,128,1,fp8,fp8,0,1.7593065897623699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,64,128,1,float16,fp8,0,1.9467093149820964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,64,128,1,float16,float16,0,1.940351963043213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,64,0,1,float16,fp8,0,3.5248212814331055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,64,0,1,fp8,fp8,0,3.217002550760905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,64,0,1,float16,float16,0,3.5241705576578775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,64,128,1,float16,fp8,0,1.9586025873819988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,64,128,1,fp8,fp8,0,1.7743840217590332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,64,128,1,float16,float16,0,1.1253706614176433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,64,0,1,float16,fp8,0,3.5410451889038086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,64,0,1,float16,float16,0,1.9373280207316081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,64,128,1,float16,fp8,0,1.1499733130137126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,64,0,1,fp8,fp8,0,3.231029192606608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,64,128,1,fp8,fp8,0,1.0696053504943848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,64,128,1,float16,float16,0,0.9961280028025309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,64,0,1,fp8,fp8,0,1.8103200594584148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,64,0,1,float16,fp8,0,1.9643786748250325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,64,128,1,float16,fp8,0,1.0050826867421467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,64,0,1,float16,float16,0,1.7924426396687825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,64,128,1,fp8,fp8,0,0.9115253289540609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,64,0,1,float16,fp8,0,1.8004159927368164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,64,128,1,float16,float16,0,1.0007893244425456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,64,0,1,fp8,fp8,0,1.6488587061564128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,64,0,1,float16,float16,0,1.7988905906677246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,64,128,1,float16,fp8,0,1.0097599824269612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,64,128,1,fp8,fp8,0,0.9178880055745443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,64,0,1,float16,fp8,0,1.8097440401713054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,64,128,1,float16,float16,0,1.00654403368632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,64,0,1,fp8,fp8,0,1.6536266009012859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,64,0,1,float16,float16,0,1.8060266176859539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,64,128,1,fp8,fp8,0,0.9252959887186686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,64,128,1,float16,fp8,0,1.0158987045288086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,64,128,1,float16,float16,0,0.596010684967041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,64,0,1,float16,fp8,0,1.816746711730957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,64,0,1,fp8,fp8,0,1.6606666247049968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,64,0,1,float16,float16,0,1.0115573406219482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,64,128,1,float16,fp8,0,0.6122239828109741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,64,128,1,fp8,fp8,0,0.5719840129216512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,64,0,1,float16,fp8,0,1.0341920057932537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,64,128,1,float16,float16,0,0.5331306854883829
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,64,0,1,fp8,fp8,0,0.9507520198822021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,64,128,1,float16,fp8,0,0.5381226539611816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,64,128,1,fp8,fp8,0,0.4933120012283325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,64,0,1,float16,float16,0,0.9409013589223226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,64,128,1,float16,float16,0,0.5357973178227743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,64,0,1,fp8,fp8,0,0.8688639799753824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,64,0,1,float16,fp8,0,0.9459359645843506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,64,128,1,fp8,fp8,0,0.49571200211842853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,64,128,1,float16,fp8,0,0.541429320971171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,64,0,1,float16,float16,0,0.9436373710632324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,64,0,1,fp8,fp8,0,0.8719147046407064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,64,128,1,float16,float16,0,0.5377920071283976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,64,0,1,float16,fp8,0,0.9496533075968424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,64,128,1,fp8,fp8,0,0.49933334191640216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,64,128,1,float16,fp8,0,0.543936014175415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,64,0,1,float16,float16,0,0.947551965713501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,64,128,1,float16,float16,0,0.3356959819793701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,64,0,1,float16,fp8,0,0.9544266859690348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,64,128,1,float16,fp8,0,0.3439519802729289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,64,0,1,fp8,fp8,0,0.8755199909210205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,64,0,1,float16,float16,0,0.5515040159225464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,64,128,1,fp8,fp8,0,0.3245493372281392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,64,128,1,float16,float16,0,0.30007465680440265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,64,0,1,fp8,fp8,0,0.5215253432591757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,64,0,1,float16,fp8,0,0.5610613425572714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,64,128,1,float16,fp8,0,0.3019893368085225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,64,128,1,fp8,fp8,0,0.2834506630897522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,64,0,1,float16,float16,0,0.5128053426742554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,64,0,1,float16,fp8,0,0.5141173203786215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,64,0,1,fp8,fp8,0,0.48045865694681805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,64,128,1,float16,float16,0,0.30219199260075885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,64,128,1,float16,fp8,0,0.30426132678985596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,64,0,1,float16,float16,0,0.5176000197728475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,64,128,1,fp8,fp8,0,0.28575466076533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,64,0,1,float16,fp8,0,0.5176373322804769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,64,128,1,float16,float16,0,0.3067359924316406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,64,0,1,fp8,fp8,0,0.4804533322652181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,64,0,1,float16,float16,0,0.518336017926534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,64,128,1,float16,fp8,0,0.3085973262786865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,64,128,1,fp8,fp8,0,0.2882773280143738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,64,0,1,float16,fp8,0,0.5219893455505371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,64,128,1,float16,float16,0,0.2387626568476359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,64,0,1,fp8,fp8,0,0.4843093156814575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,64,0,1,float16,float16,0,0.36000533898671466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,64,128,1,float16,fp8,0,0.2386080026626587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,64,128,1,fp8,fp8,0,0.22452799479166666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,64,0,1,float16,fp8,0,0.3587413231531779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,64,0,1,fp8,fp8,0,0.3307093381881714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,64,128,1,float16,float16,0,0.23452266057332358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,64,0,1,float16,float16,0,0.35257065296173096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,64,128,1,float16,fp8,0,0.23553599913915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,64,128,1,fp8,fp8,0,0.2201706568400065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,64,0,1,float16,fp8,0,0.3530506690343221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,64,0,1,fp8,fp8,0,0.32977066437403363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,64,128,1,float16,float16,0,0.23471999168395996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,64,0,1,float16,float16,0,0.35341334342956543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,64,128,1,float16,fp8,0,0.23458667596181235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,64,128,1,fp8,fp8,0,0.22409600019454956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,64,0,1,float16,fp8,0,0.35201601187388104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,64,128,1,float16,float16,0,0.23534933725992838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,64,0,1,fp8,fp8,0,0.32712000608444214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,64,0,1,float16,float16,0,0.3537919918696086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,64,128,1,float16,fp8,0,0.23482666412989298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,64,128,1,fp8,fp8,0,0.2209440072377523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,64,0,1,float16,fp8,0,0.3527359962463379
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,64,0,1,fp8,fp8,0,0.3280319968859355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,64,128,1,fp8,fp8,0,4.558714548746745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,64,128,1,float16,float16,0,5.0425920486450195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,64,128,1,float16,fp8,0,5.066117286682129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,64,0,1,float16,float16,0,7.499722798665364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,64,128,1,float16,float16,0,5.171434720357259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,64,0,1,fp8,fp8,0,6.827552159627278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,64,0,1,float16,fp8,0,7.527738571166992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,64,0,1,float16,float16,0,7.651658376057942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,64,128,1,float16,fp8,0,5.144837379455566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,64,128,1,fp8,fp8,0,4.612336158752441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,64,0,1,float16,fp8,0,7.607450485229492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,64,128,1,float16,float16,0,5.190762519836426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,64,0,1,fp8,fp8,0,6.885936101277669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,64,0,1,float16,float16,0,7.657578786214192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,64,128,1,float16,fp8,0,5.178586641947429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,64,128,1,fp8,fp8,0,4.650762557983398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,64,128,1,float16,float16,0,2.8712692260742188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,64,0,1,float16,fp8,0,7.647045135498047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,64,0,1,float16,float16,0,4.147322654724121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,64,0,1,fp8,fp8,0,6.924074808756511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,64,128,1,float16,fp8,0,2.9150400161743164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,64,128,1,fp8,fp8,0,2.7135626475016275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,64,128,1,float16,float16,0,2.5070239702860513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,64,0,1,float16,fp8,0,4.191770553588867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,64,0,1,fp8,fp8,0,3.863626797993978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,64,0,1,float16,float16,0,3.742725372314453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,64,128,1,fp8,fp8,0,2.2753547032674155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,64,128,1,float16,fp8,0,2.5287040074666343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,64,0,1,float16,fp8,0,3.764095942179362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,64,128,1,float16,float16,0,2.5240586598714194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,64,0,1,fp8,fp8,0,3.4154399236043296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,64,128,1,float16,fp8,0,2.545413335164388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,64,128,1,fp8,fp8,0,2.2935840288798013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,64,0,1,float16,float16,0,3.7600959142049155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,64,128,1,float16,float16,0,2.541253407796224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,64,0,1,float16,fp8,0,3.7831945419311523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,64,0,1,fp8,fp8,0,3.4338932037353516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,64,0,1,float16,float16,0,3.783749262491862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,64,128,1,float16,fp8,0,2.5645333925882974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,64,128,1,fp8,fp8,0,2.314341386159261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,64,128,1,float16,float16,0,1.4576053619384766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,64,0,1,float16,fp8,0,3.8040266036987305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,64,0,1,fp8,fp8,0,3.457717259724935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,64,0,1,float16,float16,0,2.099722703297933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,64,128,1,float16,fp8,0,1.4852159818013508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,64,128,1,fp8,fp8,0,1.377504030863444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,64,0,1,float16,fp8,0,2.1309706370035806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,64,0,1,fp8,fp8,0,1.9595200220743816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,64,128,1,float16,float16,0,1.2792692979176838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,64,0,1,float16,float16,0,1.903999964396159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,64,128,1,fp8,fp8,0,1.165776014328003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,64,128,1,float16,fp8,0,1.2933493455251057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,64,0,1,float16,fp8,0,1.9140106836954753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,64,0,1,fp8,fp8,0,1.7409332593282063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,64,128,1,float16,float16,0,1.2890880107879639
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,64,0,1,float16,float16,0,1.9159146944681804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,64,128,1,float16,fp8,0,1.3000373045603435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,64,128,1,fp8,fp8,0,1.1742826302846272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,64,0,1,float16,fp8,0,1.92413330078125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,64,128,1,float16,float16,0,1.2969386577606201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,64,0,1,fp8,fp8,0,1.750373363494873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,64,0,1,float16,float16,0,1.9226667086283367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,64,128,1,float16,fp8,0,1.3085227012634277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,64,128,1,fp8,fp8,0,1.1846986611684163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,64,128,1,float16,float16,0,0.7535200119018555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,64,0,1,float16,fp8,0,1.9370506604512532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,64,0,1,float16,float16,0,1.080458641052246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,64,0,1,fp8,fp8,0,1.7590239842732747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,64,128,1,float16,fp8,0,0.7715360323588053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,64,128,1,fp8,fp8,0,0.7188639640808105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,64,0,1,float16,fp8,0,1.0983253320058186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,64,128,1,float16,float16,0,0.6671520074208578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,64,0,1,fp8,fp8,0,1.0123733679453533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,64,0,1,float16,float16,0,0.9859999815622965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,64,128,1,float16,fp8,0,0.6740372975667318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,64,128,1,fp8,fp8,0,0.61190398534139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,64,0,1,float16,fp8,0,0.9917120138804117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,64,128,1,float16,float16,0,0.6716427008310953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,64,0,1,fp8,fp8,0,0.9046560128529867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,64,0,1,float16,float16,0,0.98908797899882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,64,128,1,float16,fp8,0,0.6784106890360514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,64,128,1,fp8,fp8,0,0.6170080105463663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,64,0,1,float16,fp8,0,0.9960160255432129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,64,128,1,float16,float16,0,0.675055980682373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,64,0,1,fp8,fp8,0,0.9082187016805013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,64,0,1,float16,float16,0,0.9946133295694987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,64,128,1,float16,fp8,0,0.6814026832580566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,64,128,1,fp8,fp8,0,0.6197919845581055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,64,0,1,float16,fp8,0,1.0012799898783367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,64,128,1,float16,float16,0,0.40349332491556805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,64,0,1,float16,float16,0,0.5727146863937378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,64,0,1,fp8,fp8,0,0.9125226338704427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,64,128,1,float16,fp8,0,0.41303467750549316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,64,128,1,fp8,fp8,0,0.387935996055603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,64,0,1,float16,fp8,0,0.5829973220825195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,64,128,1,float16,float16,0,0.35875733693440753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,64,0,1,fp8,fp8,0,0.5391199986139933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,64,0,1,float16,float16,0,0.5242400169372559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,64,128,1,float16,fp8,0,0.3617440064748128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,64,128,1,fp8,fp8,0,0.33479468027750653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,64,0,1,float16,fp8,0,0.5258453289667765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,64,0,1,fp8,fp8,0,0.48598400751749676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,64,128,1,float16,float16,0,0.3609706560770671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,64,0,1,float16,float16,0,0.5254826545715332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,64,128,1,float16,fp8,0,0.36532266934712726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,64,128,1,fp8,fp8,0,0.33691199620564777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,64,0,1,float16,fp8,0,0.5291039943695068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,64,128,1,float16,float16,0,0.3630666732788086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,64,0,1,fp8,fp8,0,0.48951466878255206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,64,0,1,float16,float16,0,0.528661330540975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,64,128,1,float16,fp8,0,0.3672106663386027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,64,128,1,fp8,fp8,0,0.33859201272328693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,64,128,1,float16,float16,0,0.2302186687787374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,64,0,1,float16,fp8,0,0.5317279895146688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,64,0,1,float16,float16,0,0.3195786674817403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,64,0,1,fp8,fp8,0,0.49004268646240234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,64,128,1,float16,fp8,0,0.235642671585083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,64,128,1,fp8,fp8,0,0.22214933236440024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,64,0,1,float16,fp8,0,0.32548266649246216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,64,0,1,fp8,fp8,0,0.3036693334579468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,64,128,1,float16,float16,0,0.20359466473261514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,64,0,1,float16,float16,0,0.2898293336232503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,64,128,1,fp8,fp8,0,0.1918826699256897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,64,128,1,float16,fp8,0,0.20565332969029745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,64,0,1,float16,fp8,0,0.2922826608022054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,64,0,1,fp8,fp8,0,0.2750399907430013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,64,128,1,float16,float16,0,0.20454933245976767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,64,0,1,float16,float16,0,0.2916906674702962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,64,128,1,float16,fp8,0,0.20681599775950113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,64,128,1,fp8,fp8,0,0.1956640084584554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,64,0,1,float16,fp8,0,0.29293866952260333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,64,0,1,fp8,fp8,0,0.2757973273595174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,64,128,1,float16,float16,0,0.2057973345120748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,64,128,1,float16,fp8,0,0.20677334070205688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,64,0,1,float16,float16,0,0.29336533943812054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,64,128,1,fp8,fp8,0,0.19632534186045328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,64,128,1,float16,float16,0,0.1665546695391337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,64,0,1,float16,fp8,0,0.2946453293164571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,64,0,1,fp8,fp8,0,0.27745600541432697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,64,128,1,float16,fp8,0,0.16661333044370016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,64,128,1,fp8,fp8,0,0.15501866738001505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,64,0,1,float16,float16,0,0.21768534183502197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,64,128,1,float16,float16,0,0.16285333037376404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,64,0,1,float16,fp8,0,0.21766400337219238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,64,0,1,fp8,fp8,0,0.20126932859420776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,64,128,1,float16,fp8,0,0.16274133324623108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,64,128,1,fp8,fp8,0,0.15241066614786783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,64,0,1,float16,float16,0,0.21394133567810059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,64,128,1,float16,float16,0,0.16251200437545776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,64,0,1,fp8,fp8,0,0.199727992216746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,64,0,1,float16,fp8,0,0.21279466152191162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,64,128,1,fp8,fp8,0,0.1523146629333496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,64,128,1,float16,fp8,0,0.16294399897257486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,64,0,1,float16,float16,0,0.2132586638132731
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,64,128,1,float16,float16,0,0.16261333227157593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,64,0,1,fp8,fp8,0,0.1980746587117513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,64,0,1,float16,fp8,0,0.21173866589864096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,64,128,1,float16,fp8,0,0.1627253293991089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,64,128,1,fp8,fp8,0,0.15254400173823038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,64,0,1,float16,float16,0,0.21306665738423666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,64,0,1,fp8,fp8,0,0.19900800784428915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,64,0,1,float16,fp8,0,0.21344000101089478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,64,128,1,float16,float16,0,3.7388534545898438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,64,128,1,float16,fp8,0,3.764981269836426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,64,0,1,float16,float16,0,4.975114822387695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,64,128,1,fp8,fp8,0,3.3797972997029624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,64,0,1,fp8,fp8,0,4.497791926066081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,64,0,1,float16,fp8,0,4.974570592244466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,64,128,1,float16,float16,0,3.7757228215535483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,64,0,1,float16,float16,0,4.990138689676921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,64,128,1,fp8,fp8,0,3.423290570576986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,64,128,1,float16,fp8,0,3.7987521489461265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,64,0,1,float16,fp8,0,5.010335922241211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,64,128,1,float16,float16,0,3.7981014251708984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,64,0,1,fp8,fp8,0,4.53986136118571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,64,0,1,float16,float16,0,5.013930638631185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,64,128,1,float16,fp8,0,3.818709373474121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,64,128,1,fp8,fp8,0,3.4471521377563477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,64,128,1,float16,float16,0,2.1496960322062173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,64,0,1,float16,fp8,0,5.036202748616536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,64,0,1,fp8,fp8,0,4.563045183817546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,64,0,1,float16,float16,0,2.7874987920125327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,64,128,1,float16,fp8,0,2.18614927927653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,64,128,1,fp8,fp8,0,2.0273067156473794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,64,128,1,float16,float16,0,1.8764853477478027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,64,0,1,float16,fp8,0,2.8340158462524414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,64,0,1,fp8,fp8,0,2.596901257832845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,64,0,1,float16,float16,0,2.486783981323242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,64,128,1,float16,fp8,0,1.8938719431559246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,64,128,1,fp8,fp8,0,1.7037173906962078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,64,0,1,float16,fp8,0,2.504586696624756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,64,128,1,float16,float16,0,1.8904959360758464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,64,0,1,fp8,fp8,0,2.2653600374857583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,64,0,1,float16,float16,0,2.502522627512614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,64,128,1,float16,fp8,0,1.907109260559082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,64,128,1,fp8,fp8,0,1.718282699584961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,64,0,1,float16,fp8,0,2.5188533465067544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,64,0,1,fp8,fp8,0,2.279973347981771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,64,128,1,float16,float16,0,1.9020053545633953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,64,0,1,float16,float16,0,2.5145440101623535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,64,128,1,float16,fp8,0,1.9218026796976726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,64,128,1,fp8,fp8,0,1.7332053184509277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,64,0,1,float16,fp8,0,2.533583958943685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,64,128,1,float16,float16,0,1.0954293409983318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,64,0,1,float16,float16,0,1.4177974065144856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,64,0,1,fp8,fp8,0,2.2938453356424966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,64,128,1,fp8,fp8,0,1.0361333688100178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,64,128,1,float16,fp8,0,1.117146650950114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,64,0,1,float16,fp8,0,1.438037395477295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,64,0,1,fp8,fp8,0,1.3227787017822266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,64,128,1,float16,float16,0,0.9615626335144043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,64,0,1,float16,float16,0,1.271615982055664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,64,128,1,float16,fp8,0,0.9702346324920654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,64,128,1,fp8,fp8,0,0.8758613268534342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,64,0,1,float16,fp8,0,1.2792800267537434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,64,0,1,fp8,fp8,0,1.160757303237915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,64,128,1,float16,float16,0,0.9681546688079834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,64,0,1,float16,float16,0,1.2764373620351155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,64,128,1,float16,fp8,0,0.977194627126058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,64,128,1,fp8,fp8,0,0.8842879931131998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,64,0,1,float16,fp8,0,1.2864853541056316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,64,128,1,float16,float16,0,0.9743999640146891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,64,0,1,fp8,fp8,0,1.1665493647257488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,64,0,1,float16,float16,0,1.285423994064331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,64,128,1,float16,fp8,0,0.9822026888529459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,64,128,1,fp8,fp8,0,0.890666643778483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,64,0,1,float16,fp8,0,1.2947359879811604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,64,128,1,float16,float16,0,0.5696959892908732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,64,0,1,fp8,fp8,0,1.1746026674906414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,64,0,1,float16,float16,0,0.7353546619415283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,64,128,1,fp8,fp8,0,0.5417600075403849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,64,128,1,float16,fp8,0,0.5822346607844034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,64,128,1,float16,float16,0,0.5026400089263916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,64,0,1,float16,fp8,0,0.7480746905008951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,64,0,1,fp8,fp8,0,0.6890933513641357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,64,128,1,float16,fp8,0,0.5078986485799154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,64,128,1,fp8,fp8,0,0.46214401721954346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,64,0,1,float16,float16,0,0.6612319946289062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,64,128,1,float16,float16,0,0.5069973468780518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,64,0,1,fp8,fp8,0,0.6085226535797119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,64,0,1,float16,fp8,0,0.6643840074539185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,64,128,1,fp8,fp8,0,0.4654560089111328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,64,128,1,float16,fp8,0,0.5119200150171915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,64,0,1,float16,float16,0,0.6651039918263754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,64,128,1,float16,float16,0,0.5104639927546183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,64,0,1,fp8,fp8,0,0.61135466893514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,64,0,1,float16,fp8,0,0.6698773701985677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,64,128,1,float16,fp8,0,0.5134826501210531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,64,128,1,fp8,fp8,0,0.4699519872665405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,64,0,1,float16,float16,0,0.6680693626403809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,64,128,1,float16,float16,0,0.30606400966644287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,64,0,1,fp8,fp8,0,0.6139786640803019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,64,0,1,float16,fp8,0,0.6739412943522135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,64,0,1,float16,float16,0,0.3921866814295451
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,64,128,1,float16,fp8,0,0.31437333424886066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,64,128,1,fp8,fp8,0,0.2952959934870402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,64,0,1,fp8,fp8,0,0.3712853193283081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,64,128,1,float16,float16,0,0.26928534110387164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,64,0,1,float16,fp8,0,0.40109864870707196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,64,0,1,float16,float16,0,0.35277334849039715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,64,128,1,fp8,fp8,0,0.25281065702438354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,64,128,1,float16,fp8,0,0.27318400144577026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,64,0,1,fp8,fp8,0,0.33143999179204303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,64,0,1,float16,fp8,0,0.35470934708913165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,64,128,1,float16,float16,0,0.27194132407506305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,64,0,1,float16,float16,0,0.3551199833552043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,64,128,1,float16,fp8,0,0.2743413249651591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,64,128,1,fp8,fp8,0,0.2574186722437541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,64,0,1,float16,fp8,0,0.35791468620300293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,64,0,1,fp8,fp8,0,0.33153067032496136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,64,128,1,float16,float16,0,0.27475200096766156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,64,0,1,float16,float16,0,0.35755733648935956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,64,128,1,float16,fp8,0,0.27754666407903034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,64,128,1,fp8,fp8,0,0.2569813330968221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,64,0,1,float16,fp8,0,0.36161065101623535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,64,128,1,float16,float16,0,0.17704000075658163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,64,0,1,fp8,fp8,0,0.3352320194244385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,64,0,1,float16,float16,0,0.22342934211095175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,64,128,1,float16,fp8,0,0.18147200345993042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,64,128,1,fp8,fp8,0,0.17069866259892783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,64,0,1,fp8,fp8,0,0.213536004225413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,64,0,1,float16,fp8,0,0.22792534033457437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,64,128,1,float16,float16,0,0.15466666221618652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,64,0,1,float16,float16,0,0.1995946764945984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,64,128,1,float16,fp8,0,0.15498133500417074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,64,128,1,fp8,fp8,0,0.14442132910092673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,64,0,1,float16,fp8,0,0.201855997244517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,64,0,1,fp8,fp8,0,0.18502932786941528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,64,128,1,float16,float16,0,0.15656532843907675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,64,128,1,float16,fp8,0,0.15660267074902853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,64,0,1,float16,float16,0,0.20227199792861938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,64,128,1,fp8,fp8,0,0.1443946659564972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,64,0,1,float16,fp8,0,0.20129066705703735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,64,0,1,fp8,fp8,0,0.18622400363286337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,64,128,1,float16,float16,0,0.1572213371594747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,64,0,1,float16,float16,0,0.2017866571744283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,64,128,1,float16,fp8,0,0.15643200278282166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,64,128,1,fp8,fp8,0,0.1478506624698639
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,64,0,1,float16,fp8,0,0.2034239967664083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,64,0,1,fp8,fp8,0,0.1893333395322164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,64,128,1,float16,float16,0,0.12593600153923035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,64,0,1,float16,float16,0,0.15361066659291586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,64,128,1,float16,fp8,0,0.1253866652647654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,64,128,1,fp8,fp8,0,0.11970667044321696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,64,0,1,float16,fp8,0,0.15421332915623984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,64,0,1,fp8,fp8,0,0.14475199580192566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,64,128,1,float16,float16,0,0.12364266316095988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,64,0,1,float16,float16,0,0.15029333035151163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,64,128,1,float16,fp8,0,0.12396267056465149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,64,128,1,fp8,fp8,0,0.11774933338165283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,64,0,1,fp8,fp8,0,0.14274133245150247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,64,0,1,float16,fp8,0,0.1506239970525106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,64,128,1,float16,float16,0,0.12369599938392639
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,64,0,1,float16,float16,0,0.15178666512171426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,64,128,1,float16,fp8,0,0.12391466895739238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,64,128,1,fp8,fp8,0,0.11771733562151591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,64,0,1,float16,fp8,0,0.15026666720708212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,64,0,1,fp8,fp8,0,0.14273066322008768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,64,128,1,float16,float16,0,0.12365866700808208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,64,128,1,float16,fp8,0,0.12350933750470479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,64,0,1,float16,float16,0,0.1516800026098887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,64,128,1,fp8,fp8,0,0.11768000324567159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,64,0,1,float16,fp8,0,0.15081066886583963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,64,0,1,fp8,fp8,0,0.14274666706720987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,64,128,1,fp8,fp8,0,4.253477414449056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,64,128,1,float16,float16,0,4.500117301940918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,64,128,1,float16,fp8,0,4.495226542154948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,64,0,1,float16,float16,0,5.315653483072917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,64,0,1,fp8,fp8,0,4.960879961649577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,64,0,1,float16,fp8,0,5.269130706787109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,64,128,1,float16,float16,0,4.625706672668457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,64,128,1,float16,fp8,0,4.617061297098796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,64,128,1,fp8,fp8,0,4.566992123921712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,64,0,1,float16,float16,0,5.449029286702474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,64,0,1,float16,fp8,0,5.403733571370442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,64,128,1,float16,float16,0,4.654677391052246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,64,0,1,fp8,fp8,0,5.285824139912923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,64,0,1,float16,float16,0,5.484687805175781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,64,128,1,float16,fp8,0,4.6663252512613935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,64,128,1,fp8,fp8,0,4.54421329498291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,64,128,1,float16,float16,0,2.585557301839193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,64,0,1,float16,fp8,0,5.439402898152669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,64,0,1,fp8,fp8,0,5.274976094563802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,64,128,1,float16,fp8,0,2.510848045349121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,64,0,1,float16,float16,0,3.002511978149414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,64,128,1,fp8,fp8,0,2.445258617401123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,64,0,1,float16,fp8,0,2.9233919779459634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,64,128,1,float16,float16,0,2.2594186464945474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,64,0,1,fp8,fp8,0,2.816570599873861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,64,0,1,float16,float16,0,2.6549812952677407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,64,128,1,float16,fp8,0,2.2528533935546875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,64,128,1,fp8,fp8,0,2.1217172940572104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,64,0,1,float16,fp8,0,2.647989273071289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,64,0,1,fp8,fp8,0,2.4885387420654297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,64,128,1,float16,float16,0,2.27292267481486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,64,0,1,float16,float16,0,2.6694507598876953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,64,128,1,float16,fp8,0,2.268394629160563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,64,128,1,fp8,fp8,0,2.2537172635396323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,64,0,1,float16,fp8,0,2.666330655415853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,64,0,1,fp8,fp8,0,2.619765281677246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,64,128,1,float16,float16,0,2.274634679158529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,64,0,1,float16,float16,0,2.67244815826416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,64,128,1,float16,fp8,0,2.2699947357177734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,64,128,1,fp8,fp8,0,2.254591941833496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,64,128,1,float16,float16,0,1.257141351699829
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,64,0,1,float16,fp8,0,2.660559972127279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,64,0,1,fp8,fp8,0,2.6243467330932617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,64,0,1,float16,float16,0,1.4552693367004395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,64,128,1,float16,fp8,0,1.2081066767374675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,64,128,1,fp8,fp8,0,1.2191306749979656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,64,0,1,float16,fp8,0,1.4227892557779949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,64,0,1,fp8,fp8,0,1.411296049753825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,64,128,1,float16,float16,0,1.1391572952270508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,64,0,1,float16,float16,0,1.335920015970866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,64,128,1,float16,fp8,0,1.1359306971232097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,64,128,1,fp8,fp8,0,1.0449120203653972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,64,0,1,float16,fp8,0,1.3372267087300618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,64,0,1,fp8,fp8,0,1.2303040027618408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,64,128,1,float16,float16,0,1.1475253105163574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,64,0,1,float16,float16,0,1.3491573333740234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,64,128,1,float16,fp8,0,1.1422826449076335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,64,128,1,fp8,fp8,0,1.0966133276621501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,64,0,1,float16,fp8,0,1.3479466438293457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,64,0,1,fp8,fp8,0,1.285365343093872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,64,128,1,float16,float16,0,1.1451146602630615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,64,0,1,float16,float16,0,1.3450613021850586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,64,128,1,float16,fp8,0,1.1392693519592285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,64,128,1,fp8,fp8,0,1.0733706951141357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,64,0,1,float16,fp8,0,1.3420480092366536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,64,128,1,float16,float16,0,0.6254773139953613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,64,0,1,fp8,fp8,0,1.2737173239390056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,64,0,1,float16,float16,0,0.7370719909667969
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,64,128,1,float16,fp8,0,0.6134399970372518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,64,128,1,fp8,fp8,0,0.6174826622009277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,64,0,1,float16,fp8,0,0.7227253119150797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,64,0,1,fp8,fp8,0,0.7132799625396729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,64,128,1,float16,float16,0,0.5796266794204712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,64,0,1,float16,float16,0,0.6825386683146158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,64,128,1,float16,fp8,0,0.5784480174382528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,64,128,1,fp8,fp8,0,0.5320106744766235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,64,0,1,float16,fp8,0,0.6798453330993652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,64,128,1,float16,float16,0,0.581712007522583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,64,0,1,fp8,fp8,0,0.6248799959818522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,64,0,1,float16,float16,0,0.6872266928354899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,64,128,1,float16,fp8,0,0.5826079845428467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,64,128,1,fp8,fp8,0,0.5419840017954508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,64,0,1,float16,fp8,0,0.6841119925181071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,64,128,1,float16,float16,0,0.5825920104980469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,64,0,1,fp8,fp8,0,0.6327733198801676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,64,0,1,float16,float16,0,0.6851253509521484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,64,128,1,float16,fp8,0,0.5804693301518759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,64,128,1,fp8,fp8,0,0.5451253255208334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,64,0,1,float16,fp8,0,0.68449600537618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,64,128,1,float16,float16,0,0.32681065797805786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,64,0,1,float16,float16,0,0.3845119873682658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,64,0,1,fp8,fp8,0,0.641045331954956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,64,128,1,float16,fp8,0,0.3205546736717224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,64,128,1,fp8,fp8,0,0.31942399342854816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,64,0,1,float16,fp8,0,0.37703998883565265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,64,128,1,float16,float16,0,0.30075732866923016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,64,0,1,fp8,fp8,0,0.36813334623972577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,64,0,1,float16,float16,0,0.3535999854405721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,64,128,1,float16,fp8,0,0.30054400364557904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,64,128,1,fp8,fp8,0,0.27826132376988727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,64,0,1,float16,fp8,0,0.35493866602579754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,64,0,1,fp8,fp8,0,0.32549866040547687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,64,128,1,float16,float16,0,0.3023359974225362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,64,0,1,float16,float16,0,0.3556586503982544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,64,128,1,float16,fp8,0,0.3025173346201579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,64,128,1,fp8,fp8,0,0.2825973431269328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,64,0,1,float16,fp8,0,0.3558080196380615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,64,0,1,fp8,fp8,0,0.3315040071805318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,64,128,1,float16,float16,0,0.3024746576944987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,64,128,1,float16,fp8,0,0.3022720019022624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,64,0,1,float16,float16,0,0.3556533257166545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,64,128,1,fp8,fp8,0,0.28302399317423504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,64,0,1,float16,fp8,0,0.3557013273239136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,64,0,1,fp8,fp8,0,0.33262399832407635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,64,128,1,float16,float16,0,0.173962672551473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,64,0,1,float16,float16,0,0.20468266805013022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,64,128,1,float16,fp8,0,0.17306667566299438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,64,128,1,fp8,fp8,0,0.1698933243751526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,64,128,1,float16,float16,0,0.15915733575820923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,64,0,1,float16,fp8,0,0.20120533307393393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,64,0,1,fp8,fp8,0,0.19769599040349325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,64,0,1,float16,float16,0,0.18747733036677042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,64,128,1,fp8,fp8,0,0.14922666549682617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,64,128,1,float16,fp8,0,0.1591039995352427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,64,0,1,fp8,fp8,0,0.17433599630991617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,64,128,1,float16,float16,0,0.1592586636543274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,64,0,1,float16,fp8,0,0.18794133265813193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,64,128,1,float16,fp8,0,0.15784000356992087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,64,128,1,fp8,fp8,0,0.15043200055758157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,64,0,1,float16,float16,0,0.18757865826288858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,64,0,1,fp8,fp8,0,0.17613865931828818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,64,128,1,float16,float16,0,0.15998400251070657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,64,0,1,float16,fp8,0,0.18689600626627603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,64,128,1,float16,fp8,0,0.16011200348536173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,64,128,1,fp8,fp8,0,0.15288000305493674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,64,0,1,float16,float16,0,0.1877866586049398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,64,128,1,float16,float16,0,0.09963200489679973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,64,0,1,fp8,fp8,0,0.17851734161376953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,64,0,1,float16,fp8,0,0.18948266903559366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,64,0,1,float16,float16,0,0.11691199739774068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,64,128,1,float16,fp8,0,0.09799466530481975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,64,128,1,fp8,fp8,0,0.10073066751162212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,64,0,1,float16,fp8,0,0.1157919963200887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,64,0,1,fp8,fp8,0,0.11553066968917847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,64,128,1,float16,float16,0,0.08962133526802063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,64,0,1,float16,float16,0,0.10652266939481099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,64,128,1,float16,fp8,0,0.08984000484148662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,64,128,1,fp8,fp8,0,0.08170666793982188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,64,0,1,float16,fp8,0,0.10634133219718933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,64,0,1,fp8,fp8,0,0.09701866904894511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,64,128,1,float16,float16,0,0.0885813335577647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,64,0,1,float16,float16,0,0.10595200459162395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,64,128,1,float16,fp8,0,0.08913066983222961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,64,128,1,fp8,fp8,0,0.0830080012480418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,64,0,1,float16,fp8,0,0.10607999563217163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,64,0,1,fp8,fp8,0,0.09912000099817912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,64,128,1,float16,float16,0,0.08896000186602275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,64,0,1,float16,float16,0,0.10641066233317058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,64,128,1,float16,fp8,0,0.0900426705678304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,64,128,1,fp8,fp8,0,0.0842026670773824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,64,0,1,float16,fp8,0,0.10657067100207011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,64,128,1,float16,float16,0,0.056218668818473816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,64,0,1,fp8,fp8,0,0.09800533453623454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,64,0,1,float16,float16,0,0.06656533479690552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,64,128,1,float16,fp8,0,0.05620799958705902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,64,128,1,fp8,fp8,0,0.05381333331267039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,64,0,1,float16,fp8,0,0.06608533362547557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,64,0,1,fp8,fp8,0,0.06432533264160156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,64,128,1,float16,float16,0,0.054144000013669334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,64,0,1,float16,float16,0,0.06348800162474315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,64,128,1,float16,fp8,0,0.05385600030422211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,64,128,1,fp8,fp8,0,0.05036800106366476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,64,0,1,float16,fp8,0,0.06426666676998138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,64,0,1,fp8,fp8,0,0.06054399907588959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,64,128,1,float16,float16,0,0.05306133131186167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,64,0,1,float16,float16,0,0.0641653339068095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,64,128,1,float16,fp8,0,0.054192001620928444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,64,128,1,fp8,fp8,0,0.051962668697039284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,64,0,1,float16,fp8,0,0.06447466711203258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,64,0,1,fp8,fp8,0,0.06039466460545858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,64,128,1,float16,float16,0,0.053674668073654175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,64,0,1,float16,float16,0,0.06414933502674103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,64,128,1,float16,fp8,0,0.05376533170541128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,64,128,1,fp8,fp8,0,0.05167999863624573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,64,0,1,float16,fp8,0,0.06449066599210103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,64,0,1,fp8,fp8,0,0.060271998246510826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,64,128,1,fp8,fp8,0,4.1273759206136065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,64,128,1,float16,float16,0,4.374608039855957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,64,128,1,float16,fp8,0,4.3753814697265625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,64,0,1,float16,float16,0,4.445061365763347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,64,0,1,fp8,fp8,0,4.171984036763509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,64,0,1,float16,fp8,0,4.429354667663574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,64,128,1,float16,float16,0,4.4455413818359375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,64,0,1,float16,float16,0,4.5081227620442705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,64,128,1,float16,fp8,0,4.4317121505737305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,64,128,1,fp8,fp8,0,4.4477386474609375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,64,0,1,float16,fp8,0,4.490895907084147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,64,0,1,fp8,fp8,0,4.47437858581543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,64,128,1,float16,float16,0,4.514277458190918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,64,0,1,float16,float16,0,4.596672058105469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,64,128,1,fp8,fp8,0,4.415082613627116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,64,128,1,float16,fp8,0,4.499642690022786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,64,128,1,float16,float16,0,2.512117385864258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,64,0,1,float16,fp8,0,4.566037178039551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,64,0,1,fp8,fp8,0,4.4975840250651045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,64,0,1,float16,float16,0,2.5586026509602866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,64,128,1,fp8,fp8,0,2.3692639668782554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,64,128,1,float16,fp8,0,2.4459306399027505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,64,0,1,float16,fp8,0,2.4777654012044272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,64,128,1,float16,float16,0,2.190687974294027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,64,0,1,float16,float16,0,2.2294294039408364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,64,0,1,fp8,fp8,0,2.4000320434570312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,64,128,1,float16,fp8,0,2.1917707125345864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,64,128,1,fp8,fp8,0,2.062293370564779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,64,0,1,fp8,fp8,0,2.0864906311035156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,64,0,1,float16,fp8,0,2.2228479385375977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,64,128,1,float16,float16,0,2.2030399640401206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,64,0,1,float16,float16,0,2.23691193262736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,64,128,1,float16,fp8,0,2.206261316935221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,64,128,1,fp8,fp8,0,2.2005813916524253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,64,0,1,float16,fp8,0,2.2296694119771323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,64,0,1,fp8,fp8,0,2.226352055867513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,64,128,1,float16,float16,0,2.214362621307373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,64,0,1,float16,float16,0,2.2434186935424805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,64,128,1,float16,fp8,0,2.1954026222229004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,64,128,1,fp8,fp8,0,2.198618729909261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,64,128,1,float16,float16,0,1.1985013484954834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,64,0,1,float16,fp8,0,2.2349546750386557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,64,0,1,fp8,fp8,0,2.209328015645345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,64,0,1,float16,float16,0,1.2204373677571614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,64,128,1,float16,fp8,0,1.1740480264027913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,64,128,1,fp8,fp8,0,1.1887679894765217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,64,0,1,float16,fp8,0,1.2107679843902588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,64,0,1,fp8,fp8,0,1.2009066740671794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,64,128,1,float16,float16,0,1.1101919809977214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,64,0,1,float16,float16,0,1.1188639799753826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,64,128,1,float16,fp8,0,1.1068693002065022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,64,128,1,fp8,fp8,0,1.0191306273142497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,64,0,1,float16,fp8,0,1.1191733678181965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,64,0,1,fp8,fp8,0,1.0288586616516113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,64,128,1,float16,float16,0,1.1123039722442627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,64,0,1,float16,float16,0,1.128223975499471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,64,128,1,fp8,fp8,0,1.0734986464182537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,64,128,1,float16,fp8,0,1.1110506852467854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,64,0,1,float16,fp8,0,1.1278986930847168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,64,0,1,fp8,fp8,0,1.0784800052642822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,64,0,1,float16,float16,0,1.128655989964803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,64,128,1,float16,float16,0,1.1097599665323894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,64,128,1,float16,fp8,0,1.109125296274821
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,64,128,1,fp8,fp8,0,1.0543786684672039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,64,0,1,fp8,fp8,0,1.0603466828664143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,64,128,1,float16,float16,0,0.6104746659596761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,64,0,1,float16,fp8,0,1.1239466667175293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,64,0,1,float16,float16,0,0.6194560130437216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,64,128,1,float16,fp8,0,0.595690647761027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,64,128,1,fp8,fp8,0,0.5981493393580118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,64,0,1,float16,fp8,0,0.6095306475957235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,64,0,1,fp8,fp8,0,0.6050879955291748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,64,128,1,float16,float16,0,0.5639199813206991
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,64,0,1,float16,float16,0,0.5730453332265218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,64,128,1,float16,fp8,0,0.563098669052124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,64,128,1,fp8,fp8,0,0.5171519915262858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,64,0,1,fp8,fp8,0,0.5229386488596598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,64,0,1,float16,fp8,0,0.5702879826227824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,64,128,1,float16,float16,0,0.566869338353475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,64,0,1,float16,float16,0,0.5741120179494222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,64,128,1,fp8,fp8,0,0.525269349416097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,64,128,1,float16,fp8,0,0.5659840106964111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,64,0,1,float16,fp8,0,0.5741973320643107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,64,0,1,fp8,fp8,0,0.5334506829579672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,64,128,1,float16,float16,0,0.5668479998906454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,64,0,1,float16,float16,0,0.5743573506673177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,64,128,1,float16,fp8,0,0.5661333401997884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,64,128,1,float16,float16,0,0.3184693257013957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,64,128,1,fp8,fp8,0,0.5289386510848999
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,64,0,1,float16,fp8,0,0.5720106760660807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,64,0,1,fp8,fp8,0,0.5359626611073812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,64,0,1,float16,float16,0,0.32389867305755615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,64,128,1,float16,fp8,0,0.3116106589635213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,64,128,1,fp8,fp8,0,0.31160000960032147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,64,0,1,float16,fp8,0,0.318997323513031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,64,0,1,fp8,fp8,0,0.3146666685740153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,64,128,1,float16,float16,0,0.29171733061472577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,64,0,1,float16,float16,0,0.29647467533747357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,64,128,1,float16,fp8,0,0.2932533423105876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,64,128,1,fp8,fp8,0,0.2706666588783264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,64,0,1,float16,fp8,0,0.2958773374557495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,64,0,1,fp8,fp8,0,0.27261332670847577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,64,128,1,float16,float16,0,0.2937493324279785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,64,0,1,float16,float16,0,0.29653332630793255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,64,128,1,float16,fp8,0,0.29177600145339966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,64,128,1,fp8,fp8,0,0.2742080092430115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,64,0,1,float16,fp8,0,0.29803200562795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,64,0,1,fp8,fp8,0,0.2770826617876689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,64,128,1,float16,float16,0,0.2951680024464925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,64,0,1,float16,float16,0,0.2975626587867737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,64,128,1,float16,fp8,0,0.29392000039418537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,64,128,1,fp8,fp8,0,0.2754986683527629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,64,128,1,float16,float16,0,0.17084266742070517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,64,0,1,float16,fp8,0,0.29783467451731366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,64,0,1,fp8,fp8,0,0.2776906689008077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,64,0,1,float16,float16,0,0.17281599839528403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,64,128,1,float16,fp8,0,0.1674506664276123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,64,128,1,fp8,fp8,0,0.16690133015314737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,64,0,1,float16,fp8,0,0.16967467466990152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,64,128,1,float16,float16,0,0.15505599975585938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,64,0,1,fp8,fp8,0,0.16910932461420694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,64,0,1,float16,float16,0,0.15589333573977152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,64,128,1,float16,fp8,0,0.15577066938082376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,64,128,1,fp8,fp8,0,0.14454399545987448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,64,0,1,float16,fp8,0,0.1564853290716807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,64,0,1,fp8,fp8,0,0.1463466684023539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,64,128,1,float16,float16,0,0.15457066893577576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,64,0,1,float16,float16,0,0.1567626694838206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,64,128,1,float16,fp8,0,0.15386666854222616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,64,128,1,fp8,fp8,0,0.14642666776974997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,64,0,1,float16,fp8,0,0.15748266379038492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,64,0,1,fp8,fp8,0,0.1476479967435201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,64,128,1,float16,float16,0,0.15632533033688864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,64,0,1,float16,float16,0,0.1585493286450704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,64,128,1,float16,fp8,0,0.1557813286781311
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,64,128,1,fp8,fp8,0,0.14841600259145102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,64,0,1,float16,fp8,0,0.15677332878112793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,64,128,1,float16,float16,0,0.09715732932090759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,64,0,1,float16,float16,0,0.09738666812578838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,64,0,1,fp8,fp8,0,0.14947199821472168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,64,128,1,float16,fp8,0,0.09554133812586467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,64,0,1,float16,fp8,0,0.09611733754475911
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,64,128,1,fp8,fp8,0,0.09796800216039021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,64,0,1,fp8,fp8,0,0.09770666559537251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,64,128,1,float16,float16,0,0.08708266417185466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,64,0,1,float16,float16,0,0.08753066261609395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,64,128,1,float16,fp8,0,0.08681066830952962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,64,128,1,fp8,fp8,0,0.08096533517042796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,64,0,1,float16,fp8,0,0.08706667025883992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,64,0,1,fp8,fp8,0,0.08137600123882294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,64,128,1,float16,float16,0,0.08699199557304382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,64,0,1,float16,float16,0,0.08700799942016602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,64,128,1,float16,fp8,0,0.08759466807047527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,64,128,1,fp8,fp8,0,0.08201600114504497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,64,0,1,float16,fp8,0,0.08709333340326945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,64,0,1,fp8,fp8,0,0.08130666613578796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,64,128,1,float16,float16,0,0.08769599596659343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,64,0,1,float16,float16,0,0.08685333530108134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,64,128,1,float16,fp8,0,0.08725866675376892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,64,128,1,fp8,fp8,0,0.08261333405971527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,64,0,1,float16,fp8,0,0.08769067128499348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,64,0,1,fp8,fp8,0,0.08169066905975342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,64,128,1,float16,float16,0,0.05503466725349426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,64,0,1,float16,float16,0,0.05605866511662801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,64,128,1,float16,fp8,0,0.054602667689323425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,64,128,1,fp8,fp8,0,0.052442664901415505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,64,0,1,float16,fp8,0,0.05618133147557577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,64,0,1,fp8,fp8,0,0.05454400181770325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,64,128,1,float16,float16,0,0.0521066685517629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,64,0,1,float16,float16,0,0.054042667150497437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,64,128,1,float16,fp8,0,0.0521066685517629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,64,128,1,fp8,fp8,0,0.05004266897837321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,64,0,1,float16,fp8,0,0.05343999962011973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,64,0,1,fp8,fp8,0,0.0498879998922348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,64,128,1,float16,float16,0,0.05340266724427541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,64,0,1,float16,float16,0,0.05208533505598704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,64,128,1,float16,fp8,0,0.05341866612434387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,64,128,1,fp8,fp8,0,0.050144001841545105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,64,0,1,float16,fp8,0,0.052842666705449425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,64,0,1,fp8,fp8,0,0.05065066615740458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,64,128,1,float16,float16,0,0.052111998200416565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,64,0,1,float16,float16,0,0.05208533505598704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,64,128,1,float16,fp8,0,0.051925331354141235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,64,128,1,fp8,fp8,0,0.04970133304595947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,64,0,1,float16,fp8,0,0.054117331902186074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,64,0,1,fp8,fp8,0,0.05006400247414907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,64,128,1,float16,float16,0,0.03668266783157984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,64,0,1,float16,float16,0,0.03754133234421412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,64,128,1,float16,fp8,0,0.037461332976818085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,64,128,1,fp8,fp8,0,0.03554133325815201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,64,0,1,float16,fp8,0,0.03700266778469086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,64,0,1,fp8,fp8,0,0.03579733272393545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,64,128,1,float16,float16,0,0.036650667587916054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,64,0,1,float16,float16,0,0.035802667339642845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,64,128,1,float16,fp8,0,0.03573866685231527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,64,128,1,fp8,fp8,0,0.03472000112136205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,64,0,1,float16,fp8,0,0.03603733330965042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,64,0,1,fp8,fp8,0,0.03536533315976461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,64,128,1,float16,float16,0,0.03549866626660029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,64,0,1,float16,float16,0,0.03572266548871994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,64,128,1,float16,fp8,0,0.035461333890755974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,64,128,1,fp8,fp8,0,0.03375466664632162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,64,0,1,float16,fp8,0,0.03606399893760681
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,64,0,1,fp8,fp8,0,0.03366400053103765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,64,128,1,float16,float16,0,0.035786665976047516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,64,0,1,float16,float16,0,0.035818666219711304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,64,128,1,float16,fp8,0,0.03579200059175491
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,64,128,1,fp8,fp8,0,0.03533866753180822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,64,0,1,float16,fp8,0,0.03554133325815201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,64,0,1,fp8,fp8,0,0.0334346666932106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,64,0,1,float16,float16,0,2.0134827295939126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,64,128,1,float16,float16,0,2.054330666859945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,64,128,1,float16,fp8,0,2.0490612983703613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,64,128,1,fp8,fp8,0,1.8937333424886067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,64,0,1,fp8,fp8,0,1.8447413444519043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,64,0,1,float16,fp8,0,2.002602736155192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,64,128,1,float16,float16,0,2.0500853856404624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,64,0,1,float16,float16,0,2.006453355153402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,64,128,1,float16,fp8,0,2.0435412724812827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,64,128,1,fp8,fp8,0,2.012714703877767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,64,0,1,float16,fp8,0,2.0006772677103677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,64,0,1,fp8,fp8,0,1.9826614061991374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,64,128,1,float16,float16,0,2.065098603566488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,64,0,1,float16,float16,0,2.041423956553141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,64,128,1,float16,fp8,0,2.077792008717855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,64,128,1,fp8,fp8,0,2.0187039375305176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,64,0,1,float16,fp8,0,2.0172266960144043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,64,128,1,float16,float16,0,1.1224853197733562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,64,0,1,float16,float16,0,1.1022133032480876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,64,0,1,fp8,fp8,0,1.9652160008748372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,64,128,1,float16,fp8,0,1.1077653566996257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,64,128,1,fp8,fp8,0,1.1171519756317139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,64,0,1,float16,fp8,0,1.0881333351135254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,64,128,1,float16,float16,0,1.0321119626363118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,64,0,1,fp8,fp8,0,1.0809600353240967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,64,0,1,float16,float16,0,1.0152053038279216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,64,128,1,float16,fp8,0,1.0307733217875164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,64,128,1,fp8,fp8,0,0.9404053688049316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,64,0,1,float16,fp8,0,1.0142133235931396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,64,0,1,fp8,fp8,0,0.9137653509775797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,64,128,1,float16,float16,0,1.0318506558736165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,64,0,1,float16,float16,0,1.0096159776051838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,64,128,1,float16,fp8,0,1.0323786735534668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,64,128,1,fp8,fp8,0,0.982426643371582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,64,0,1,float16,fp8,0,1.0097920099894206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,64,0,1,fp8,fp8,0,0.9831519921620687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,64,128,1,float16,float16,0,1.0347572962443035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,64,0,1,float16,float16,0,1.0117706457773845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,64,128,1,fp8,fp8,0,0.9826400279998779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,64,128,1,float16,fp8,0,1.0284159978230794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,64,0,1,float16,fp8,0,1.010970671971639
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,64,0,1,fp8,fp8,0,0.9581440289815267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,64,128,1,float16,float16,0,0.5707199970881144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,64,0,1,float16,float16,0,0.559008002281189
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,64,128,1,float16,fp8,0,0.5586239894231161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,64,128,1,fp8,fp8,0,0.5621013243993124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,64,0,1,float16,fp8,0,0.5497493346532186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,64,0,1,fp8,fp8,0,0.5497599840164185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,64,128,1,float16,float16,0,0.5289599895477295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,64,0,1,float16,float16,0,0.5159733295440674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,64,128,1,fp8,fp8,0,0.4789760112762451
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,64,128,1,float16,fp8,0,0.5244853496551514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,64,0,1,float16,fp8,0,0.5154079993565878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,64,0,1,fp8,fp8,0,0.4673440059026082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,64,128,1,float16,float16,0,0.5251733462015787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,64,0,1,float16,float16,0,0.5146826505661011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,64,128,1,float16,fp8,0,0.5248053471247355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,64,128,1,fp8,fp8,0,0.4893333514531453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,64,0,1,fp8,fp8,0,0.47990934054056805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,64,0,1,float16,fp8,0,0.5135733286539713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,64,128,1,float16,float16,0,0.5272853374481201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,64,0,1,float16,float16,0,0.5154026746749878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,64,128,1,fp8,fp8,0,0.4938453435897827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,64,128,1,float16,fp8,0,0.5264799992243449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,64,0,1,float16,fp8,0,0.516325314839681
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,64,128,1,float16,float16,0,0.29789867003758747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,64,0,1,float16,float16,0,0.29313600063323975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,64,0,1,fp8,fp8,0,0.4797439972559611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,64,128,1,float16,fp8,0,0.29225067297617596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,64,0,1,float16,fp8,0,0.28647466500600177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,64,128,1,fp8,fp8,0,0.2935840090115865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,64,0,1,fp8,fp8,0,0.28672534227371216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,64,128,1,float16,float16,0,0.2746346592903137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,64,0,1,float16,float16,0,0.267685333887736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,64,128,1,float16,fp8,0,0.27292267481486004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,64,128,1,fp8,fp8,0,0.25067732731501263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,64,0,1,float16,fp8,0,0.26869867245356244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,64,0,1,fp8,fp8,0,0.24456000328063965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,64,128,1,float16,float16,0,0.2733333309491475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,64,0,1,float16,float16,0,0.26863465706507367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,64,128,1,float16,fp8,0,0.273306667804718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,64,128,1,fp8,fp8,0,0.25552000602086383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,64,0,1,float16,fp8,0,0.2672800024350484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,64,0,1,fp8,fp8,0,0.2497546672821045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,64,128,1,float16,float16,0,0.2753173311551412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,64,0,1,float16,float16,0,0.2687893311182658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,64,128,1,float16,fp8,0,0.2743946711222331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,64,128,1,fp8,fp8,0,0.2560746669769287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,64,0,1,float16,fp8,0,0.2693386673927307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,64,128,1,float16,float16,0,0.16063466668128967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,64,0,1,float16,float16,0,0.15637333194414774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,64,0,1,fp8,fp8,0,0.24955199162165323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,64,128,1,float16,fp8,0,0.1574720044930776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,64,128,1,fp8,fp8,0,0.15871999661127725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,64,0,1,float16,fp8,0,0.1562346617380778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,64,0,1,fp8,fp8,0,0.15458133816719055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,64,128,1,float16,float16,0,0.1457919975121816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,64,0,1,float16,float16,0,0.14219733079274496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,64,128,1,float16,fp8,0,0.14670399824778238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,64,128,1,fp8,fp8,0,0.13447999954223633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,64,0,1,float16,fp8,0,0.1429333289464315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,64,0,1,fp8,fp8,0,0.1321440041065216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,64,128,1,float16,float16,0,0.14652799566586813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,64,0,1,float16,float16,0,0.14350400368372598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,64,128,1,float16,fp8,0,0.1469386617342631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,64,128,1,fp8,fp8,0,0.13660800457000732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,64,0,1,float16,fp8,0,0.14402133226394653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,64,0,1,fp8,fp8,0,0.13385066390037537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,64,128,1,float16,float16,0,0.1477013329664866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,64,0,1,float16,float16,0,0.14386666814486185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,64,128,1,float16,fp8,0,0.14627733826637268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,64,128,1,fp8,fp8,0,0.13917866349220276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,64,0,1,float16,fp8,0,0.14390933513641357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,64,0,1,fp8,fp8,0,0.13428266843159994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,64,128,1,float16,float16,0,0.09139200051625569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,64,0,1,float16,float16,0,0.09082133571306865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,64,128,1,float16,fp8,0,0.09115733702977498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,64,128,1,fp8,fp8,0,0.09361066420873006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,64,0,1,float16,fp8,0,0.08960533142089844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,64,0,1,fp8,fp8,0,0.091839998960495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,64,128,1,float16,float16,0,0.08342400193214417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,64,0,1,float16,float16,0,0.08081066608428955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,64,128,1,float16,fp8,0,0.08408000071843465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,64,128,1,fp8,fp8,0,0.07692799965540568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,64,0,1,fp8,fp8,0,0.07579733431339264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,64,0,1,float16,fp8,0,0.08112533390522003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,64,128,1,float16,float16,0,0.08277866741021474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,64,0,1,float16,float16,0,0.08082666496435802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,64,128,1,float16,fp8,0,0.08297599852085114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,64,128,1,fp8,fp8,0,0.0768746683994929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,64,0,1,float16,fp8,0,0.08067733546098073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,64,0,1,fp8,fp8,0,0.07529066503047943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,64,128,1,float16,float16,0,0.0830506682395935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,64,0,1,float16,float16,0,0.08061866462230682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,64,128,1,float16,fp8,0,0.08401599526405334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,64,128,1,fp8,fp8,0,0.07691200077533722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,64,0,1,float16,fp8,0,0.08053333560625713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,64,0,1,fp8,fp8,0,0.07666133344173431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,64,128,1,float16,float16,0,0.05120000243186951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,64,0,1,float16,float16,0,0.050250664353370667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,64,128,1,float16,fp8,0,0.05076266825199127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,64,128,1,fp8,fp8,0,0.051455999414126076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,64,0,1,float16,fp8,0,0.05031466484069824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,64,0,1,fp8,fp8,0,0.0480320006608963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,64,128,1,float16,float16,0,0.049770668148994446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,64,0,1,float16,float16,0,0.04794133206208547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,64,128,1,float16,fp8,0,0.050111999114354454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,64,128,1,fp8,fp8,0,0.04640533526738485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,64,0,1,float16,fp8,0,0.048026666045188904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,64,0,1,fp8,fp8,0,0.04572266836961111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,64,128,1,float16,float16,0,0.0498879998922348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,64,0,1,float16,float16,0,0.04772266745567322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,64,128,1,float16,fp8,0,0.04977599779764811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,64,128,1,fp8,fp8,0,0.045968001087506614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,64,0,1,float16,fp8,0,0.04788800080617269
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,64,0,1,fp8,fp8,0,0.046037331223487854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,64,128,1,float16,float16,0,0.048613334695498146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,64,0,1,float16,float16,0,0.047872001926104225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,64,128,1,float16,fp8,0,0.04891733328501383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,64,128,1,fp8,fp8,0,0.04746133089065552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,64,0,1,float16,fp8,0,0.047968000173568726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,64,0,1,fp8,fp8,0,0.045706664522488914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,64,128,1,float16,float16,0,0.035232000052928925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,64,0,1,float16,float16,0,0.03349866718053818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,64,128,1,float16,fp8,0,0.034330666065216064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,64,128,1,fp8,fp8,0,0.033402666449546814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,64,0,1,float16,fp8,0,0.033701332906881966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,64,0,1,fp8,fp8,0,0.03342933456103007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,64,128,1,float16,float16,0,0.033589333295822144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,64,0,1,float16,float16,0,0.03345066557327906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,64,128,1,float16,fp8,0,0.035930665830771126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,64,128,1,fp8,fp8,0,0.0315786674618721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,64,0,1,float16,fp8,0,0.03344533344109853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,64,0,1,fp8,fp8,0,0.03133866687615713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,64,128,1,float16,float16,0,0.033786666889985405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,64,0,1,float16,float16,0,0.03344533344109853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,64,128,1,float16,fp8,0,0.03346666693687439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,64,128,1,fp8,fp8,0,0.03350399931271871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,64,0,1,float16,fp8,0,0.03331200033426285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,64,0,1,fp8,fp8,0,0.03165333221356074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,64,128,1,float16,float16,0,0.03350399931271871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,64,0,1,float16,float16,0,0.033402666449546814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,64,128,1,float16,fp8,0,0.03383466601371765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,64,128,1,fp8,fp8,0,0.03319466610749563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,64,0,1,float16,fp8,0,0.033439998825391136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,64,0,1,fp8,fp8,0,0.031685332457224526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,64,128,1,float16,float16,0,0.02500266581773758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,64,0,1,float16,float16,0,0.023210667073726654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,64,128,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,64,128,1,fp8,fp8,0,0.023658665517965954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,64,0,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,64,0,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,64,128,1,float16,float16,0,0.023397333920001984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,64,0,1,float16,float16,0,0.023317334552605946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,64,128,1,float16,fp8,0,0.023178666830062866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,64,128,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,64,0,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,64,0,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,64,0,1,float16,float16,0,0.02329600105683009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,64,128,1,float16,float16,0,0.023386667172114056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,64,128,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,64,128,1,fp8,fp8,0,0.02348266790310542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,64,0,1,float16,fp8,0,0.023333333432674408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,64,0,1,fp8,fp8,0,0.023013333479563396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,64,0,1,float16,float16,0,0.023344000180562336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,64,128,1,float16,fp8,0,0.025237334271272022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,64,128,1,fp8,fp8,0,0.023071999351183575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,64,0,1,float16,fp8,0,0.023818666736284893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,64,0,1,fp8,fp8,0,0.023285334308942158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,64,128,1,float16,float16,0,1.0829493204752605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,64,128,1,float16,fp8,0,1.0776106516520183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,64,0,1,float16,float16,0,1.0811786651611328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,64,128,1,fp8,fp8,0,1.002458651860555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,64,0,1,float16,fp8,0,1.0760266780853271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,64,0,1,fp8,fp8,0,1.0041013558705647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,64,128,1,float16,float16,0,1.0815359751383464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,64,0,1,float16,float16,0,1.0812853177388508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,64,128,1,float16,fp8,0,1.0766133467356365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,64,128,1,fp8,fp8,0,1.0743306477864583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,64,0,1,float16,fp8,0,1.0757173697153728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,64,0,1,fp8,fp8,0,1.08023468653361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,64,128,1,float16,float16,0,1.0844799677530925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,64,0,1,float16,float16,0,1.0863733291625977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,64,128,1,float16,fp8,0,1.083674669265747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,64,128,1,float16,float16,0,0.5976693232854208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,64,128,1,fp8,fp8,0,1.0733599662780762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,64,0,1,fp8,fp8,0,1.083568016688029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,64,0,1,float16,fp8,0,1.0828159650166829
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,64,128,1,float16,fp8,0,0.5858933528264364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,64,0,1,float16,float16,0,0.5980746746063232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,64,128,1,fp8,fp8,0,0.5910346508026123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,64,0,1,float16,fp8,0,0.5872906843821207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,64,128,1,float16,float16,0,0.5486026604970297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,64,0,1,fp8,fp8,0,0.5910293261210123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,64,0,1,float16,float16,0,0.5482399861017863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,64,128,1,float16,fp8,0,0.5476640065511068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,64,128,1,fp8,fp8,0,0.5093760093053182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,64,0,1,float16,fp8,0,0.5476586818695068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,64,0,1,fp8,fp8,0,0.5097813208897909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,64,128,1,float16,float16,0,0.5498400131861368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,64,0,1,float16,float16,0,0.5476213296254476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,64,128,1,float16,fp8,0,0.5470559994379679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,64,128,1,fp8,fp8,0,0.5218293269475301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,64,0,1,float16,fp8,0,0.5470399856567383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,64,0,1,fp8,fp8,0,0.5230026642481486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,64,128,1,float16,float16,0,0.5508106549580892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,64,0,1,float16,float16,0,0.5517333348592123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,64,128,1,float16,fp8,0,0.5516320069630941
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,64,128,1,fp8,fp8,0,0.5256373484929403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,64,0,1,float16,fp8,0,0.5501226584116617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,64,0,1,fp8,fp8,0,0.5253440141677856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,64,128,1,float16,float16,0,0.31033066908518475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,64,0,1,float16,float16,0,0.310261329015096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,64,128,1,fp8,fp8,0,0.3046240011850993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,64,128,1,float16,fp8,0,0.30425065755844116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,64,0,1,float16,fp8,0,0.303765336672465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,64,0,1,fp8,fp8,0,0.30564266443252563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,64,128,1,float16,float16,0,0.28392000993092853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,64,0,1,float16,float16,0,0.2847200036048889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,64,128,1,float16,fp8,0,0.2846933404604594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,64,128,1,fp8,fp8,0,0.2644373377164205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,64,0,1,float16,fp8,0,0.2840213378270467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,64,0,1,fp8,fp8,0,0.26524800062179565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,64,128,1,float16,float16,0,0.28434133529663086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,64,0,1,float16,float16,0,0.28438933690388996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,64,128,1,float16,fp8,0,0.282042662302653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,64,128,1,fp8,fp8,0,0.2690933346748352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,64,0,1,float16,fp8,0,0.2842719952265422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,64,0,1,fp8,fp8,0,0.26897599299748737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,64,128,1,float16,float16,0,0.2834720015525818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,64,0,1,float16,float16,0,0.284277339776357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,64,128,1,fp8,fp8,0,0.2706560095151265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,64,128,1,float16,fp8,0,0.2846826712290446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,64,0,1,fp8,fp8,0,0.2715253432591756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,64,0,1,float16,fp8,0,0.2829013268152873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,64,128,1,float16,float16,0,0.16501866777737936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,64,0,1,float16,float16,0,0.16296533743540445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,64,128,1,float16,fp8,0,0.1613759994506836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,64,128,1,fp8,fp8,0,0.16331733266512552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,64,0,1,float16,fp8,0,0.16107733050982156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,64,0,1,fp8,fp8,0,0.16390933593114218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,64,128,1,float16,float16,0,0.1509119967619578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,64,0,1,float16,float16,0,0.15093333522478738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,64,128,1,float16,fp8,0,0.15027733643849692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,64,128,1,fp8,fp8,0,0.14205333590507507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,64,0,1,float16,fp8,0,0.15200000007947287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,64,0,1,fp8,fp8,0,0.1409119963645935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,64,128,1,float16,float16,0,0.15018666783968607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,64,0,1,float16,float16,0,0.15147733688354492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,64,128,1,float16,fp8,0,0.15228266517321268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,64,128,1,fp8,fp8,0,0.14315733313560486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,64,0,1,float16,fp8,0,0.15019733707110086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,64,0,1,fp8,fp8,0,0.14262400070826212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,64,128,1,float16,float16,0,0.15253333250681558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,64,0,1,float16,float16,0,0.15107733011245728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,64,128,1,float16,fp8,0,0.15074666341145834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,64,128,1,fp8,fp8,0,0.14414933323860168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,64,0,1,float16,fp8,0,0.1511413355668386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,64,0,1,fp8,fp8,0,0.14390400052070618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,64,128,1,float16,float16,0,0.0930613378683726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,64,0,1,float16,float16,0,0.09341333309809367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,64,128,1,float16,fp8,0,0.0918986697991689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,64,128,1,fp8,fp8,0,0.09513599673906963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,64,0,1,float16,fp8,0,0.09128000338872273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,64,0,1,fp8,fp8,0,0.09658132990201314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,64,128,1,float16,float16,0,0.08509332935015361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,64,0,1,float16,float16,0,0.08417600393295288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,64,128,1,float16,fp8,0,0.0851200024286906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,64,128,1,fp8,fp8,0,0.08057066798210144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,64,0,1,float16,fp8,0,0.08492799599965413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,64,0,1,fp8,fp8,0,0.08052266637484233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,64,128,1,float16,float16,0,0.08493866523106892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,64,0,1,float16,float16,0,0.08462400237719218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,64,128,1,float16,fp8,0,0.08432533343633015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,64,128,1,fp8,fp8,0,0.07901333272457123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,64,0,1,float16,fp8,0,0.0848426620165507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,64,0,1,fp8,fp8,0,0.07921066880226135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,64,128,1,float16,float16,0,0.08470400174458821
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,64,0,1,float16,float16,0,0.08379200100898743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,64,128,1,float16,fp8,0,0.08480532964070638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,64,128,1,fp8,fp8,0,0.08027199904123943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,64,0,1,float16,fp8,0,0.08472533027331035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,64,0,1,fp8,fp8,0,0.08012799918651581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,64,128,1,float16,float16,0,0.05417599777380625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,64,0,1,float16,float16,0,0.05357333521048228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,64,128,1,float16,fp8,0,0.053690666953722634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,64,0,1,float16,fp8,0,0.05309866865475973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,64,128,1,fp8,fp8,0,0.053344001372655235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,64,0,1,fp8,fp8,0,0.052570665876070656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,64,128,1,float16,float16,0,0.051130667328834534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,64,0,1,float16,float16,0,0.0516480008761088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,64,128,1,float16,fp8,0,0.05022400120894114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,64,128,1,fp8,fp8,0,0.04782933493455251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,64,0,1,float16,fp8,0,0.05180266499519348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,64,0,1,fp8,fp8,0,0.0480373352766037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,64,128,1,float16,float16,0,0.049829334020614624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,64,0,1,float16,float16,0,0.050053333242734276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,64,128,1,float16,fp8,0,0.051813334226608276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,64,128,1,fp8,fp8,0,0.047770669062932335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,64,0,1,float16,fp8,0,0.05017066498597463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,64,0,1,fp8,fp8,0,0.04929600159327189
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,64,128,1,float16,float16,0,0.05134933193524679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,64,0,1,float16,float16,0,0.049925332268079124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,64,128,1,float16,fp8,0,0.05179733534653982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,64,128,1,fp8,fp8,0,0.04888000090916952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,64,0,1,float16,fp8,0,0.050245334704717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,64,0,1,fp8,fp8,0,0.049914668003718056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,64,128,1,float16,float16,0,0.03268266717592875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,64,0,1,float16,float16,0,0.03365333378314972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,64,128,1,float16,fp8,0,0.03341866781314214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,64,128,1,fp8,fp8,0,0.03142400085926056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,64,0,1,float16,fp8,0,0.03347733368476232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,64,0,1,fp8,fp8,0,0.03252800057331721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,64,128,1,float16,float16,0,0.031514666974544525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,64,0,1,float16,float16,0,0.03126399964094162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,64,128,1,float16,fp8,0,0.031717332700888314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,64,128,1,fp8,fp8,0,0.029306667546431225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,64,0,1,float16,fp8,0,0.03161599983771642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,64,0,1,fp8,fp8,0,0.031162666777769726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,64,128,1,float16,float16,0,0.031658666829268135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,64,0,1,float16,float16,0,0.03133866687615713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,64,128,1,float16,fp8,0,0.031914666295051575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,64,128,1,fp8,fp8,0,0.03148266673088074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,64,0,1,float16,fp8,0,0.03154666721820831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,64,0,1,fp8,fp8,0,0.03162133445342382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,64,128,1,float16,float16,0,0.03125333289305369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,64,0,1,float16,float16,0,0.031680000325044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,64,128,1,float16,fp8,0,0.031504000226656594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,64,128,1,fp8,fp8,0,0.031199999153614044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,64,0,1,float16,fp8,0,0.03202133377393087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,64,0,1,fp8,fp8,0,0.03170666595300039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,64,128,1,float16,float16,0,0.025290665527184803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,64,0,1,float16,float16,0,0.025263999899228413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,64,128,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,64,128,1,fp8,fp8,0,0.025274666647116344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,64,0,1,float16,fp8,0,0.025237334271272022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,64,0,1,fp8,fp8,0,0.023973333338896435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,64,128,1,float16,float16,0,0.02518400053183238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,64,0,1,float16,float16,0,0.023152001202106476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,64,128,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,64,128,1,fp8,fp8,0,0.02348800003528595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,64,0,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,64,0,1,fp8,fp8,0,0.02407466620206833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,64,128,1,float16,float16,0,0.02346666653951009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,64,0,1,float16,float16,0,0.025237334271272022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,64,128,1,float16,fp8,0,0.023386667172114056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,64,128,1,fp8,fp8,0,0.02316266546646754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,64,0,1,float16,fp8,0,0.02518933266401291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,64,128,1,float16,float16,0,0.023669332265853882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,64,0,1,fp8,fp8,0,0.023503998915354412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,64,0,1,float16,float16,0,0.025237334271272022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,64,128,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,64,128,1,fp8,fp8,0,0.023743999501069386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,64,0,1,float16,fp8,0,0.025205334027608235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,64,0,1,fp8,fp8,0,0.024080000817775726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,64,128,1,float16,float16,0,0.019023999571800232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,64,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,64,128,1,float16,fp8,0,0.01939733326435089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,64,128,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,64,0,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,64,128,1,float16,float16,0,0.018837332725524902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,64,0,1,float16,float16,0,0.01937599976857503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,64,128,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,64,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,64,0,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,64,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,64,128,1,float16,float16,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,64,128,1,float16,fp8,0,0.018810667097568512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,64,0,1,float16,float16,0,0.020245333512624104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,64,0,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,64,0,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,64,128,1,float16,float16,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,64,0,1,float16,float16,0,0.019082666685183842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,64,128,1,float16,fp8,0,0.018672000616788864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,64,128,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,64,0,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,64,0,1,fp8,fp8,0,0.017370666066805523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,64,128,1,float16,float16,0,0.759930690129598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,64,0,1,float16,float16,0,0.758997360865275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,64,128,1,float16,fp8,0,0.7575519879659017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,64,128,1,fp8,fp8,0,0.6936853726704916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,64,0,1,fp8,fp8,0,0.6944693724314371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,64,0,1,float16,fp8,0,0.7566773096720377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,64,128,1,float16,float16,0,0.7574506600697836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,64,0,1,float16,float16,0,0.7577706972757975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,64,128,1,float16,fp8,0,0.7546666463216146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,64,128,1,fp8,fp8,0,0.7064159711201986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,64,0,1,float16,fp8,0,0.756165345509847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,64,0,1,fp8,fp8,0,0.7065866788228353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,64,128,1,float16,float16,0,0.7577813466389974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,64,0,1,float16,float16,0,0.7616906960805258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,64,128,1,float16,fp8,0,0.7576053142547607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,64,128,1,fp8,fp8,0,0.7087626457214355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,64,0,1,fp8,fp8,0,0.7101120154062907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,64,0,1,float16,fp8,0,0.7582453091939291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,64,128,1,float16,float16,0,0.4118826786677043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,64,0,1,float16,float16,0,0.41205334663391113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,64,128,1,fp8,fp8,0,0.395466685295105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,64,128,1,float16,fp8,0,0.4073599974314372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,64,0,1,float16,fp8,0,0.4060853322347005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,64,0,1,fp8,fp8,0,0.3957226673762004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,64,128,1,float16,float16,0,0.3871786594390869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,64,0,1,float16,float16,0,0.38889066378275555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,64,128,1,float16,fp8,0,0.3885813156763713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,64,128,1,fp8,fp8,0,0.354751984278361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,64,0,1,float16,fp8,0,0.38809601465861004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,64,0,1,fp8,fp8,0,0.35572266578674316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,64,128,1,float16,float16,0,0.38712533315022785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,64,0,1,float16,float16,0,0.38634665807088214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,64,128,1,float16,fp8,0,0.38681598504384357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,64,128,1,fp8,fp8,0,0.3612320025761922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,64,0,1,float16,fp8,0,0.38526399930318195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,64,0,1,fp8,fp8,0,0.3601173162460327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,64,128,1,float16,float16,0,0.3880693515141805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,64,0,1,float16,float16,0,0.387824018796285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,64,128,1,float16,fp8,0,0.38714667161305744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,64,128,1,fp8,fp8,0,0.3622560103734334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,64,0,1,float16,fp8,0,0.3872479995091756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,64,128,1,float16,float16,0,0.2165279984474182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,64,0,1,float16,float16,0,0.21563732624053955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,64,0,1,fp8,fp8,0,0.36268798510233563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,64,128,1,float16,fp8,0,0.21262399355570474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,64,128,1,fp8,fp8,0,0.20863999923070273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,64,0,1,float16,fp8,0,0.2121386726697286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,64,0,1,fp8,fp8,0,0.20832000176111856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,64,128,1,float16,float16,0,0.20273067553838095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,64,0,1,float16,float16,0,0.2039626638094584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,64,128,1,float16,fp8,0,0.201690673828125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,64,128,1,fp8,fp8,0,0.18665599822998047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,64,0,1,float16,fp8,0,0.20382400353749594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,64,0,1,fp8,fp8,0,0.18702934185663858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,64,128,1,float16,float16,0,0.2032053271929423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,64,0,1,float16,float16,0,0.20204800367355347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,64,128,1,float16,fp8,0,0.2035413384437561
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,64,128,1,fp8,fp8,0,0.18758400281270346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,64,0,1,float16,fp8,0,0.20245865980784097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,64,0,1,fp8,fp8,0,0.18923733631769815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,64,128,1,float16,float16,0,0.20381333430608115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,64,0,1,float16,float16,0,0.20195200045903525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,64,128,1,float16,fp8,0,0.2025760014851888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,64,128,1,fp8,fp8,0,0.19000534216562906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,64,0,1,float16,fp8,0,0.2036906679471334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,64,0,1,fp8,fp8,0,0.18890666961669922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,64,128,1,float16,float16,0,0.1188106636206309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,64,0,1,float16,float16,0,0.11922132968902588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,64,128,1,float16,fp8,0,0.11684800187746684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,64,128,1,fp8,fp8,0,0.11623467008272807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,64,0,1,float16,fp8,0,0.11588799953460693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,64,128,1,float16,float16,0,0.1102293332417806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,64,0,1,fp8,fp8,0,0.11809066931406657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,64,0,1,float16,float16,0,0.11122666796048482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,64,128,1,float16,fp8,0,0.11017599701881409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,64,128,1,fp8,fp8,0,0.10214933753013611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,64,0,1,fp8,fp8,0,0.10223999619483948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,64,0,1,float16,fp8,0,0.10981333255767822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,64,128,1,float16,float16,0,0.10938133796056111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,64,0,1,float16,float16,0,0.10995733737945557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,64,128,1,float16,fp8,0,0.11097066601117452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,64,128,1,fp8,fp8,0,0.10133333007494609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,64,0,1,float16,fp8,0,0.1093280017375946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,64,0,1,fp8,fp8,0,0.10221866766611735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,64,128,1,float16,float16,0,0.11082667112350464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,64,0,1,float16,float16,0,0.11130133271217346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,64,128,1,float16,fp8,0,0.11044800281524658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,64,128,1,fp8,fp8,0,0.10293333729108174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,64,0,1,float16,fp8,0,0.1102133293946584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,64,0,1,fp8,fp8,0,0.10244266192118327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,64,128,1,float16,float16,0,0.0651039977868398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,64,0,1,float16,float16,0,0.06410133341948192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,64,128,1,float16,fp8,0,0.06517333288987477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,64,128,1,fp8,fp8,0,0.06467199822266896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,64,0,1,float16,fp8,0,0.06588799754778545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,64,0,1,fp8,fp8,0,0.06405866642793019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,64,0,1,float16,float16,0,0.06411733229955037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,64,128,1,float16,float16,0,0.06412800153096516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,64,128,1,float16,fp8,0,0.06239999830722809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,64,128,1,fp8,fp8,0,0.05964800218741099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,64,0,1,float16,fp8,0,0.06428800026575725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,64,0,1,fp8,fp8,0,0.05868266522884369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,64,128,1,float16,float16,0,0.062218666076660156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,64,0,1,float16,float16,0,0.06437866886456807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,64,128,1,float16,fp8,0,0.06409599880377452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,64,128,1,fp8,fp8,0,0.05961599946022034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,64,0,1,float16,fp8,0,0.06438399851322174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,64,0,1,fp8,fp8,0,0.0601440022389094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,64,128,1,float16,float16,0,0.06271466612815857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,64,0,1,float16,float16,0,0.06348266700903575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,64,128,1,float16,fp8,0,0.06267199913660686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,64,128,1,fp8,fp8,0,0.06033066908518473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,64,0,1,float16,fp8,0,0.06311999758084615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,64,0,1,fp8,fp8,0,0.05972800155480703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,64,128,1,float16,float16,0,0.041296000281969704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,64,0,1,float16,float16,0,0.039994666973749794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,64,128,1,float16,fp8,0,0.03968533376852671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,64,128,1,fp8,fp8,0,0.038986665507157646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,64,0,1,float16,fp8,0,0.04014399896065394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,64,0,1,fp8,fp8,0,0.039493332306543984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,64,128,1,float16,float16,0,0.03965866565704346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,64,0,1,float16,float16,0,0.03988266736268997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,64,128,1,float16,fp8,0,0.03973866750796636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,64,128,1,fp8,fp8,0,0.03805333375930786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,64,0,1,float16,fp8,0,0.040106666584809623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,64,0,1,fp8,fp8,0,0.03753600021203359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,64,128,1,float16,float16,0,0.0397173340121905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,64,0,1,float16,float16,0,0.03995733211437861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,64,128,1,float16,fp8,0,0.03952533255020777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,64,128,1,fp8,fp8,0,0.03770133356253306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,64,0,1,float16,fp8,0,0.04013866682847341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,64,0,1,fp8,fp8,0,0.03760000069936117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,64,128,1,float16,float16,0,0.03975466638803482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,64,0,1,float16,float16,0,0.03944533318281174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,64,128,1,float16,fp8,0,0.03962666789690653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,64,128,1,fp8,fp8,0,0.037461332976818085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,64,0,1,float16,fp8,0,0.03940266619126002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,64,0,1,fp8,fp8,0,0.03772266705830892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,64,128,1,float16,float16,0,0.027215999861558277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,64,0,1,float16,float16,0,0.02740799884001414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,64,128,1,float16,fp8,0,0.027274665733178455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,64,128,1,fp8,fp8,0,0.02740799884001414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,64,0,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,64,0,1,fp8,fp8,0,0.025466665625572205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,64,128,1,float16,float16,0,0.027263998985290527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,64,0,1,float16,float16,0,0.027322667340437572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,64,128,1,float16,fp8,0,0.027114666998386383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,64,128,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,64,0,1,float16,fp8,0,0.02641066660483678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,64,0,1,fp8,fp8,0,0.025226667523384094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,64,128,1,float16,float16,0,0.02532266577084859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,64,0,1,float16,float16,0,0.02550933261712392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,64,128,1,float16,fp8,0,0.025450666745503742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,64,128,1,fp8,fp8,0,0.025285333395004272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,64,0,1,float16,fp8,0,0.027237333357334137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,64,0,1,fp8,fp8,0,0.025061334172884624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,64,128,1,float16,float16,0,0.025226667523384094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,64,0,1,float16,float16,0,0.027429332335789997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,64,128,1,float16,fp8,0,0.027162666122118633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,64,128,1,fp8,fp8,0,0.027269333600997925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,64,0,1,float16,fp8,0,0.027093333502610523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,64,0,1,fp8,fp8,0,0.026330667237440746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,64,128,1,float16,float16,0,0.021503999829292297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,64,0,1,float16,float16,0,0.021221332252025604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,64,128,1,float16,fp8,0,0.02146133283774058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,64,128,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,64,0,1,float16,fp8,0,0.021802666286627453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,64,0,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,64,128,1,float16,float16,0,0.021354667842388153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,64,0,1,float16,float16,0,0.021221332252025604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,64,128,1,float16,fp8,0,0.10890133182207744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,64,128,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,64,0,1,float16,fp8,0,0.021221332252025604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,64,0,1,fp8,fp8,0,0.019632000476121902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,64,128,1,float16,float16,0,0.021226666867733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,64,0,1,float16,float16,0,0.021301334102948506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,64,128,1,float16,fp8,0,0.020997333029905956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,64,128,1,fp8,fp8,0,0.020960000654061634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,64,0,1,float16,fp8,0,0.020997333029905956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,64,0,1,fp8,fp8,0,0.020117333779732387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,64,128,1,float16,float16,0,0.02094399929046631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,64,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,64,128,1,float16,fp8,0,0.021114667256673176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,64,128,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,64,0,1,float16,fp8,0,0.021269333859284718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,64,0,1,fp8,fp8,0,0.021061333517233532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,64,128,1,float16,float16,0,0.0174346665541331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,64,0,1,float16,float16,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,64,128,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,64,128,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,64,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,64,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,64,0,1,float16,float16,0,0.01756799966096878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,64,128,1,float16,fp8,0,0.017535999417304993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,64,128,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,64,0,1,float16,fp8,0,0.01758933315674464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,64,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,64,128,1,float16,float16,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,64,0,1,float16,float16,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,64,128,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,64,128,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,64,0,1,float16,fp8,0,0.017477333545684814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,64,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,64,128,1,float16,float16,0,0.017397332936525345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,64,128,1,float16,fp8,0,0.018725333114465077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,64,128,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,64,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,64,0,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,64,128,1,float16,float16,0,0.6004906495412191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,64,0,1,float16,float16,0,0.6004319985707601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,64,128,1,float16,fp8,0,0.5990293423334757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,64,128,1,fp8,fp8,0,0.5409813324610392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,64,0,1,fp8,fp8,0,0.5412906805674235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,64,0,1,float16,fp8,0,0.5999733209609985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,64,128,1,float16,float16,0,0.5994720061620077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,64,0,1,float16,float16,0,0.5993599891662598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,64,128,1,float16,fp8,0,0.5986293156941732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,64,128,1,fp8,fp8,0,0.5459413528442383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,64,0,1,fp8,fp8,0,0.547322670618693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,64,0,1,float16,fp8,0,0.5976373354593912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,64,128,1,float16,float16,0,0.5998773177464803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,64,0,1,float16,float16,0,0.6007253328959147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,64,128,1,float16,fp8,0,0.5987093448638916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,64,128,1,fp8,fp8,0,0.5470879872639974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,64,0,1,fp8,fp8,0,0.5469599962234497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,64,0,1,float16,fp8,0,0.5991679827372233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,64,128,1,float16,float16,0,0.3203786611557007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,64,128,1,float16,fp8,0,0.3179733355840047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,64,0,1,float16,float16,0,0.31966932614644367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,64,0,1,float16,fp8,0,0.31706666946411133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,64,128,1,fp8,fp8,0,0.3009546597798665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,64,0,1,fp8,fp8,0,0.30052800973256427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,64,128,1,float16,float16,0,0.30751466751098633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,64,0,1,float16,float16,0,0.3081173300743103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,64,128,1,fp8,fp8,0,0.2789919972419739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,64,0,1,float16,fp8,0,0.30765867233276367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,64,128,1,float16,fp8,0,0.30873600641886395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,64,0,1,fp8,fp8,0,0.27874133984247845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,64,128,1,float16,float16,0,0.30908799171447754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,64,0,1,float16,float16,0,0.30858665704727173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,64,128,1,float16,fp8,0,0.30716800689697266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,64,128,1,fp8,fp8,0,0.28065599997838336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,64,0,1,float16,fp8,0,0.30795733133951825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,64,0,1,fp8,fp8,0,0.28037333488464355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,64,128,1,float16,float16,0,0.3079199989636739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,64,0,1,float16,float16,0,0.30750399827957153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,64,128,1,float16,fp8,0,0.3076159954071045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,64,128,1,fp8,fp8,0,0.281546672185262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,64,0,1,float16,fp8,0,0.30745599667231244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,64,128,1,float16,float16,0,0.16922666629155478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,64,0,1,float16,float16,0,0.17051732540130615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,64,0,1,fp8,fp8,0,0.2802773316701253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,64,128,1,float16,fp8,0,0.1687999963760376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,64,128,1,fp8,fp8,0,0.16209066907564798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,64,0,1,float16,fp8,0,0.16780267159144083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,64,0,1,fp8,fp8,0,0.16214932998021445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,64,128,1,float16,float16,0,0.16167466839154562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,64,0,1,float16,float16,0,0.16313599546750387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,64,128,1,float16,fp8,0,0.16214932998021445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,64,128,1,fp8,fp8,0,0.14617600043614706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,64,0,1,float16,fp8,0,0.16196266810099283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,64,0,1,fp8,fp8,0,0.14808533589045206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,64,128,1,float16,float16,0,0.162773331006368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,64,0,1,float16,float16,0,0.1627679963906606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,64,128,1,float16,fp8,0,0.16169599692026773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,64,128,1,fp8,fp8,0,0.14833066860834757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,64,0,1,float16,fp8,0,0.16290666659673056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,64,0,1,fp8,fp8,0,0.14818666378657022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,64,128,1,float16,float16,0,0.16159466902414957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,64,0,1,float16,float16,0,0.16307199994723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,64,128,1,float16,fp8,0,0.16145599881807962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,64,128,1,fp8,fp8,0,0.14896532893180847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,64,0,1,float16,fp8,0,0.16262400150299072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,64,128,1,float16,float16,0,0.09110400080680847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,64,0,1,fp8,fp8,0,0.14798399806022644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,64,0,1,float16,float16,0,0.09109866619110107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,64,128,1,float16,fp8,0,0.0909493366877238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,64,0,1,float16,fp8,0,0.09058133761088054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,64,128,1,fp8,fp8,0,0.08698667089144389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,64,0,1,fp8,fp8,0,0.08504533767700195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,64,128,1,float16,float16,0,0.08936533331871033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,64,0,1,float16,float16,0,0.08905599514643352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,64,128,1,float16,fp8,0,0.08912533521652222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,64,128,1,fp8,fp8,0,0.08137066662311554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,64,0,1,float16,fp8,0,0.08878933389981587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,64,0,1,fp8,fp8,0,0.0828906645377477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,64,128,1,float16,float16,0,0.08898133039474487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,64,0,1,float16,float16,0,0.0883840024471283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,64,128,1,float16,fp8,0,0.08974400162696838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,64,128,1,fp8,fp8,0,0.08290666838486989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,64,0,1,float16,fp8,0,0.08867733677228291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,64,0,1,fp8,fp8,0,0.08275199929873149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,64,128,1,float16,float16,0,0.08896000186602275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,64,0,1,float16,float16,0,0.08890133102734883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,64,128,1,float16,fp8,0,0.0890933374563853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,64,128,1,fp8,fp8,0,0.0827893316745758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,64,0,1,float16,fp8,0,0.08884800473848979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,64,128,1,float16,float16,0,0.052341332038243614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,64,0,1,fp8,fp8,0,0.08281066517035167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,64,0,1,float16,float16,0,0.0539680023988088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,64,128,1,float16,fp8,0,0.05319466690222422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,64,128,1,fp8,fp8,0,0.051829333106676735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,64,0,1,float16,fp8,0,0.05421333511670431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,64,0,1,fp8,fp8,0,0.05030400057633718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,64,128,1,float16,float16,0,0.05299200117588043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,64,0,1,float16,float16,0,0.05186133086681366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,64,128,1,float16,fp8,0,0.05222400029500326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,64,128,1,fp8,fp8,0,0.049914668003718056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,64,0,1,float16,fp8,0,0.051872000098228455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,64,0,1,fp8,fp8,0,0.050010666251182556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,64,128,1,float16,float16,0,0.052095999320348106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,64,0,1,float16,float16,0,0.051962668697039284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,64,128,1,float16,fp8,0,0.052042668064435325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,64,128,1,fp8,fp8,0,0.049957334995269775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,64,0,1,float16,fp8,0,0.052005335688591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,64,0,1,fp8,fp8,0,0.048351998130480446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,64,128,1,float16,float16,0,0.052069331208864846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,64,0,1,float16,float16,0,0.051829333106676735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,64,128,1,float16,fp8,0,0.05180799961090088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,64,128,1,fp8,fp8,0,0.04902400076389313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,64,0,1,float16,fp8,0,0.05209066470464071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,64,0,1,fp8,fp8,0,0.04996799925963084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,64,128,1,float16,float16,0,0.03347733368476232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,64,0,1,float16,float16,0,0.03358400116364161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,64,128,1,float16,fp8,0,0.03551999976237615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,64,128,1,fp8,fp8,0,0.03356799980004629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,64,0,1,float16,fp8,0,0.03369066615899404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,64,0,1,fp8,fp8,0,0.03367999941110611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,64,128,1,float16,float16,0,0.035429333647092186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,64,0,1,float16,float16,0,0.033301333586374916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,64,128,1,float16,fp8,0,0.03534399966398875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,64,128,1,fp8,fp8,0,0.03355200091997782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,64,0,1,float16,fp8,0,0.03333866596221924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,64,0,1,fp8,fp8,0,0.031770666440327965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,64,128,1,float16,float16,0,0.03408533334732056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,64,0,1,float16,float16,0,0.033861334125200905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,64,128,1,float16,fp8,0,0.03363200028737386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,64,128,1,fp8,fp8,0,0.03161599983771642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,64,0,1,float16,fp8,0,0.03448000053564707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,64,0,1,fp8,fp8,0,0.031557333966096245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,64,128,1,float16,float16,0,0.033770665526390076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,64,0,1,float16,float16,0,0.03397866586844126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,64,128,1,float16,fp8,0,0.03350399931271871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,64,128,1,fp8,fp8,0,0.03324799984693527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,64,0,1,float16,fp8,0,0.03533333291610082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,64,0,1,fp8,fp8,0,0.03342399994532267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,64,128,1,float16,float16,0,0.025066666305065155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,64,0,1,float16,float16,0,0.023226665953795116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,64,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,64,128,1,fp8,fp8,0,0.023168000082174938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,64,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,64,0,1,fp8,fp8,0,0.02345066765944163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,64,128,1,float16,float16,0,0.023391999304294586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,64,0,1,float16,float16,0,0.023423999547958374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,64,128,1,float16,fp8,0,0.023472001155217487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,64,128,1,fp8,fp8,0,0.022304000953833263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,64,0,1,float16,fp8,0,0.023455999791622162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,64,0,1,fp8,fp8,0,0.02294933299223582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,64,128,1,float16,float16,0,0.023221333821614582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,64,0,1,float16,float16,0,0.023103999594847362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,64,128,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,64,128,1,fp8,fp8,0,0.023370665808518726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,64,0,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,64,0,1,fp8,fp8,0,0.0235359991590182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,64,128,1,float16,float16,0,0.02351466566324234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,64,0,1,float16,float16,0,0.023018665611743927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,64,128,1,float16,fp8,0,0.023621333142121632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,64,128,1,fp8,fp8,0,0.022890667120615642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,64,0,1,float16,fp8,0,0.02330133318901062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,64,0,1,fp8,fp8,0,0.022895999252796173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,64,128,1,float16,float16,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,64,0,1,float16,float16,0,0.021125334004561108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,64,128,1,float16,fp8,0,0.019445333629846573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,64,128,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,64,0,1,float16,fp8,0,0.020938667158285778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,64,0,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,64,128,1,float16,float16,0,0.020799999435742695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,64,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,64,128,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,64,128,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,64,0,1,float16,fp8,0,0.01926400015751521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,64,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,64,128,1,float16,float16,0,0.020608000457286835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,64,0,1,float16,float16,0,0.020015999674797058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,64,128,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,64,128,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,64,0,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,64,0,1,fp8,fp8,0,0.019386666516462963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,64,128,1,float16,float16,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,64,0,1,float16,float16,0,0.02130666623512904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,64,128,1,float16,fp8,0,0.019493332753578823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,64,128,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,64,0,1,float16,fp8,0,0.01951466624935468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,64,128,1,float16,float16,0,0.015493333339691162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,64,0,1,fp8,fp8,0,0.019893333315849304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,64,128,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,64,128,1,fp8,fp8,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,64,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,64,128,1,float16,float16,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,64,128,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,64,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,64,128,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,64,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,64,0,1,fp8,fp8,0,0.01759999990463257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,64,128,1,fp8,fp8,0,0.016751999656359356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,64,128,1,float16,fp8,0,0.017594666530688603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,64,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,64,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,64,128,1,float16,float16,0,0.017397332936525345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,64,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,64,128,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,64,128,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,64,0,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,64,128,1,float16,float16,0,0.5178560018539429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,64,128,1,float16,fp8,0,0.5190773407618204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,64,0,1,float16,float16,0,0.519536018371582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,64,128,1,fp8,fp8,0,0.4655199845631917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,64,0,1,float16,fp8,0,0.5174026489257812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,64,0,1,fp8,fp8,0,0.4657333294550578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,64,128,1,float16,float16,0,0.5174506505330404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,64,0,1,float16,float16,0,0.517413338025411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,64,128,1,float16,fp8,0,0.5172266562779745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,64,128,1,fp8,fp8,0,0.46828265984853107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,64,0,1,float16,fp8,0,0.5166666507720947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,64,0,1,fp8,fp8,0,0.4668480157852173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,64,128,1,float16,float16,0,0.5181546608606974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,64,0,1,float16,float16,0,0.5176426569620768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,64,128,1,float16,fp8,0,0.516485333442688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,64,128,1,fp8,fp8,0,0.46860798199971515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,64,0,1,float16,fp8,0,0.5173759857813517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,64,0,1,fp8,fp8,0,0.46900800863901776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,64,0,1,float16,float16,0,0.27323200305302936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,64,128,1,float16,float16,0,0.2746880054473877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,64,128,1,float16,fp8,0,0.27135467529296875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,64,128,1,fp8,fp8,0,0.2550986607869466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,64,0,1,float16,fp8,0,0.2731893261273702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,64,0,1,fp8,fp8,0,0.2557973265647888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,64,128,1,float16,float16,0,0.2669600049654643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,64,0,1,float16,float16,0,0.2671733299891154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,64,128,1,float16,fp8,0,0.2670346697171529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,64,128,1,fp8,fp8,0,0.24099200963974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,64,0,1,float16,fp8,0,0.26597867409388226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,64,0,1,fp8,fp8,0,0.24084800481796265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,64,128,1,float16,float16,0,0.2672320008277893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,64,0,1,float16,float16,0,0.26546667019526166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,64,128,1,float16,fp8,0,0.26554665962855023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,64,128,1,fp8,fp8,0,0.2409706711769104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,64,0,1,float16,fp8,0,0.2669386665026347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,64,0,1,fp8,fp8,0,0.24038400252660116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,64,128,1,float16,float16,0,0.26604266961415607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,64,0,1,float16,float16,0,0.2670240004857381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,64,128,1,float16,fp8,0,0.26588799556096393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,64,128,1,fp8,fp8,0,0.24155733982721964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,64,0,1,float16,fp8,0,0.2656853397687276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,64,0,1,fp8,fp8,0,0.2405973275502523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,64,128,1,float16,float16,0,0.141567995150884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,64,0,1,float16,float16,0,0.14232533176740012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,64,128,1,float16,fp8,0,0.14216533303260803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,64,128,1,fp8,fp8,0,0.13265599807103476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,64,0,1,float16,fp8,0,0.1414293348789215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,64,128,1,float16,float16,0,0.14006400108337402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,64,0,1,fp8,fp8,0,0.13222933808962503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,64,0,1,float16,float16,0,0.14014933506647745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,64,128,1,float16,fp8,0,0.1402773360411326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,64,128,1,fp8,fp8,0,0.12809066971143088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,64,0,1,float16,fp8,0,0.13992533087730408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,64,0,1,fp8,fp8,0,0.12982933719952902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,64,128,1,float16,float16,0,0.14050133029619852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,64,0,1,float16,float16,0,0.13954666256904602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,64,128,1,fp8,fp8,0,0.1280693312486013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,64,128,1,float16,fp8,0,0.13954133788744608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,64,0,1,float16,fp8,0,0.14020267128944397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,64,0,1,fp8,fp8,0,0.12807466586430868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,64,128,1,float16,float16,0,0.14007467031478882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,64,128,1,float16,fp8,0,0.14060800274213156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,64,0,1,float16,float16,0,0.1386666695276896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,64,128,1,fp8,fp8,0,0.13006400068600973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,64,128,1,float16,float16,0,0.07871466875076294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,64,0,1,float16,fp8,0,0.1402079959710439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,64,0,1,fp8,fp8,0,0.1300266683101654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,64,0,1,float16,float16,0,0.0791786660750707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,64,128,1,float16,fp8,0,0.07868800063927968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,64,128,1,fp8,fp8,0,0.07435733576615651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,64,0,1,float16,fp8,0,0.07877333462238312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,64,0,1,fp8,fp8,0,0.07456533114115398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,64,128,1,float16,float16,0,0.07806399961312611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,64,0,1,float16,float16,0,0.0787360022465388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,64,128,1,float16,fp8,0,0.07853333155314128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,64,128,1,fp8,fp8,0,0.07284800211588542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,64,0,1,float16,fp8,0,0.07708266874154408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,64,0,1,fp8,fp8,0,0.0720960001150767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,64,128,1,float16,float16,0,0.07877333462238312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,64,0,1,float16,float16,0,0.07656533519426982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,64,128,1,float16,fp8,0,0.07673066854476929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,64,128,1,fp8,fp8,0,0.07201600074768066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,64,0,1,float16,fp8,0,0.07678933441638947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,64,0,1,fp8,fp8,0,0.07258666555086772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,64,128,1,float16,float16,0,0.07673066854476929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,64,0,1,float16,float16,0,0.07675200204054515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,64,128,1,float16,fp8,0,0.07665599882602692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,64,128,1,fp8,fp8,0,0.07249066730340321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,64,128,1,float16,float16,0,0.04764266808827718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,64,0,1,fp8,fp8,0,0.07238399982452393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,64,0,1,float16,fp8,0,0.07831466694672902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,64,0,1,float16,float16,0,0.0476746658484141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,64,128,1,float16,fp8,0,0.047781333327293396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,64,128,1,fp8,fp8,0,0.04611733555793762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,64,0,1,float16,fp8,0,0.047968000173568726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,64,0,1,fp8,fp8,0,0.04402133325735728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,64,128,1,float16,float16,0,0.045909335215886436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,64,0,1,float16,float16,0,0.04794133206208547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,64,128,1,float16,fp8,0,0.04730666677157084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,64,128,1,fp8,fp8,0,0.04385066529115041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,64,0,1,float16,fp8,0,0.048058668772379555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,64,0,1,fp8,fp8,0,0.04390933116277059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,64,128,1,float16,float16,0,0.04810666541258494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,64,0,1,float16,float16,0,0.04598399996757507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,64,128,1,float16,fp8,0,0.047685335079828896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,64,128,1,fp8,fp8,0,0.045279999574025474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,64,0,1,float16,fp8,0,0.04613866905371348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,64,0,1,fp8,fp8,0,0.043893332282702126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,64,128,1,float16,float16,0,0.047983999053637184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,64,0,1,float16,float16,0,0.046207999189694725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,64,128,1,float16,fp8,0,0.04780800143877665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,64,128,1,fp8,fp8,0,0.045663997530937195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,64,0,1,float16,fp8,0,0.046240001916885376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,64,0,1,fp8,fp8,0,0.04387199878692627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,64,128,1,float16,float16,0,0.0315733328461647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,64,0,1,float16,float16,0,0.03178133318821589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,64,128,1,float16,fp8,0,0.0312266672650973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,64,128,1,fp8,fp8,0,0.02961066613594691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,64,0,1,float16,fp8,0,0.03197333216667175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,64,0,1,fp8,fp8,0,0.0295413335164388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,64,128,1,float16,float16,0,0.029477333029111225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,64,0,1,float16,float16,0,0.0295413335164388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,64,128,1,float16,fp8,0,0.030282666285832722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,64,128,1,fp8,fp8,0,0.029477333029111225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,64,0,1,float16,fp8,0,0.030447999636332195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,64,0,1,fp8,fp8,0,0.029343999922275543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,64,128,1,float16,float16,0,0.031557333966096245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,64,0,1,float16,float16,0,0.03156266609827677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,64,128,1,float16,fp8,0,0.0312266672650973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,64,128,1,fp8,fp8,0,0.029557332396507263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,64,0,1,float16,fp8,0,0.031925333042939506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,64,0,1,fp8,fp8,0,0.02941333254178365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,64,128,1,float16,float16,0,0.03160000095764796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,64,0,1,float16,float16,0,0.03145066648721695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,64,128,1,float16,fp8,0,0.03129599988460541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,64,128,1,fp8,fp8,0,0.029535998900731403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,64,0,1,float16,fp8,0,0.03154666721820831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,64,0,1,fp8,fp8,0,0.02920000006755193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,64,128,1,float16,float16,0,0.022661333282788593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,64,0,1,float16,float16,0,0.021727999051411945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,64,128,1,float16,fp8,0,0.02203733225663503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,64,128,1,fp8,fp8,0,0.021407999098300934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,64,0,1,float16,fp8,0,0.02254933367172877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,64,0,1,fp8,fp8,0,0.02327999969323476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,64,128,1,float16,float16,0,0.023237332701683044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,64,0,1,float16,float16,0,0.02147199958562851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,64,128,1,fp8,fp8,0,0.023306667804718018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,64,0,1,float16,fp8,0,0.022096000611782074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,64,0,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,64,0,1,float16,float16,0,0.0230880007147789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,64,128,1,float16,float16,0,0.022015998760859173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,64,128,1,float16,fp8,0,0.022053333620230358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,64,128,1,fp8,fp8,0,0.023120000958442688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,64,0,1,float16,fp8,0,0.023247999449570973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,64,0,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,64,128,1,float16,float16,0,0.0232640008131663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,64,0,1,float16,float16,0,0.021520001192887623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,64,128,1,float16,fp8,0,0.02347733328739802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,64,128,1,fp8,fp8,0,0.02294933299223582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,64,0,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,64,0,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,64,128,1,float16,float16,0,0.01929066702723503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,64,0,1,float16,float16,0,0.01937066639463107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,64,128,1,float16,fp8,0,0.019413333386182785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,64,128,1,fp8,fp8,0,0.0186666672428449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,64,0,1,float16,fp8,0,0.01977066695690155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,64,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,64,128,1,float16,float16,0,0.018901333212852478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,64,0,1,float16,float16,0,0.019098666807015736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,64,128,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,64,128,1,float16,fp8,0,0.019813333948453266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,64,0,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,64,0,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,64,0,1,float16,float16,0,0.018954666952292126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,64,128,1,float16,float16,0,0.01998399943113327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,64,128,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,64,128,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,64,0,1,float16,fp8,0,0.02179199953873952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,64,0,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,64,128,1,float16,float16,0,0.019066666563351948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,64,128,1,float16,fp8,0,0.020645332833131153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,64,0,1,float16,float16,0,0.019546666493018467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,64,128,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,64,0,1,float16,fp8,0,0.01947733387351036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,64,0,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,64,128,1,fp8,fp8,0,0.01534933348496755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,64,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,64,128,1,float16,float16,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,64,0,1,float16,float16,0,0.016250666230916977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,64,128,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,64,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,64,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,64,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,64,128,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,64,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,64,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,0,0.43697067101796466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,0,0.4373439947764079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,0,0.437061349550883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,1,64,128,1,fp8,fp8,0,0.3982773224512736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,0,0.4375893274943034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,1,64,0,1,fp8,fp8,0,0.39849066734313965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,0,0.4357920090357463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,0,0.43731733163197833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,0,0.43780267238616943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,4,64,128,1,fp8,fp8,0,0.3985813458760579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,0,0.43729066848754883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,4,64,0,1,fp8,fp8,0,0.398144006729126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,0,0.4373279809951782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,0,0.43725331624348956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,0,0.43758400281270343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,8,64,128,1,fp8,fp8,0,0.39823468526204425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,0,0.43694400787353516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,8,64,0,1,fp8,fp8,0,0.3981013298034668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,96,64,128,1,float16,float16,0,0.22611733277638754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,96,64,0,1,float16,float16,0,0.22451732556025186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,96,64,128,1,float16,fp8,0,0.22438400983810425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,96,64,128,1,fp8,fp8,0,0.20677866538365683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,96,64,0,1,float16,fp8,0,0.22426666816075644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,96,64,0,1,fp8,fp8,0,0.20602132876714072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,0,0.2239840030670166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,0,0.22427199284235635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,0,0.22663466135660806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,1,64,128,1,fp8,fp8,0,0.20599466562271118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,0,0.22638400395711264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,1,64,0,1,fp8,fp8,0,0.20566399892171225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,0,0.22517865896224976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,0,0.22419732809066772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,0,0.22615466515223184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,4,64,128,1,fp8,fp8,0,0.20577067136764526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,0,0.22484266757965088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,4,64,0,1,fp8,fp8,0,0.20536533991495767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,0,0.2262293299039205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,0,0.22447466850280762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,8,64,128,1,fp8,fp8,0,0.20570667584737143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,0,0.22466667493184408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,0,0.2261013388633728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,8,64,0,1,fp8,fp8,0,0.2057173252105713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,96,64,128,1,float16,float16,0,0.12152533729871114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,96,64,0,1,float16,float16,0,0.1197813351949056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,96,64,128,1,float16,fp8,0,0.12063999970753987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,96,64,128,1,fp8,fp8,0,0.11127466956774394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,96,64,0,1,float16,fp8,0,0.12149332960446675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,96,64,0,1,fp8,fp8,0,0.11148266990979512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,0,0.11940266688664754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,0,0.1200213332970937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,0,0.12172800302505493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,1,64,128,1,fp8,fp8,0,0.11134933431943257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,0,0.12148800492286682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,1,64,0,1,fp8,fp8,0,0.11155200004577637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,0,0.12010133266448975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,0,0.12152000268300374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,0,0.12155733505884807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,4,64,128,1,fp8,fp8,0,0.11166933178901672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,0,0.11974933743476868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,4,64,0,1,fp8,fp8,0,0.11144000291824341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,0,0.12105066577593486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,0,0.1199733316898346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,0,0.11986666917800903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,8,64,128,1,fp8,fp8,0,0.11141332983970642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,0,0.11990933616956075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,8,64,0,1,fp8,fp8,0,0.11129599809646606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,96,64,128,1,float16,float16,0,0.06830400228500366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,96,64,0,1,float16,float16,0,0.06844800213972728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,96,64,128,1,float16,fp8,0,0.06841599941253662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,96,64,128,1,fp8,fp8,0,0.06256533165772755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,96,64,0,1,float16,fp8,0,0.06834666430950165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,96,64,0,1,fp8,fp8,0,0.06427733103434245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,0,0.0680159976085027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,0,0.06851199766000111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,0,0.06865066786607106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,1,64,128,1,fp8,fp8,0,0.062319998939832054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,0,0.06852266689141591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,1,64,0,1,fp8,fp8,0,0.06418133278687795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,0,0.06824000179767609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,0,0.06817600131034851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,0,0.0690880020459493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,4,64,128,1,fp8,fp8,0,0.06411733229955037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,0,0.06832533578077953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,4,64,0,1,fp8,fp8,0,0.06366933385531108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,0,0.06839466591676076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,0,0.06876799960931142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,0,0.06862933437029521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,8,64,128,1,fp8,fp8,0,0.06215466558933258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,0,0.06849599877993266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,8,64,0,1,fp8,fp8,0,0.06334400177001953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,96,64,128,1,float16,float16,0,0.0415786678592364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,96,64,0,1,float16,float16,0,0.04188266893227895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,96,64,128,1,float16,fp8,0,0.041749333341916404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,96,64,128,1,fp8,fp8,0,0.03953066716591517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,96,64,0,1,float16,fp8,0,0.04172799984614054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,96,64,0,1,fp8,fp8,0,0.03980266551176707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,0,0.04180799921353658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,0,0.04180799921353658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,0,0.04362666606903076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,1,64,128,1,fp8,fp8,0,0.03979733337958654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,0,0.04161066561937332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,1,64,0,1,fp8,fp8,0,0.03951466580231985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,0,0.04151466737190882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,0,0.04208533465862274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,0,0.0421066681543986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,4,64,128,1,fp8,fp8,0,0.03937600056330363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,0,0.043749332427978516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,4,64,0,1,fp8,fp8,0,0.0397119993964831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,0,0.04154666761557261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,0,0.04171200096607208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,0,0.04372266431649526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,8,64,128,1,fp8,fp8,0,0.0397173340121905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,0,0.043621331453323364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,96,64,128,1,float16,float16,0,0.029274667302767437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,8,64,0,1,fp8,fp8,0,0.04012800008058548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,96,64,0,1,float16,float16,0,0.028138667345046997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,96,64,128,1,float16,fp8,0,0.02923733244339625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,96,64,128,1,fp8,fp8,0,0.028746667007605236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,96,64,0,1,float16,fp8,0,0.02921066681543986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,96,64,0,1,fp8,fp8,0,0.027466667195161183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,0,0.029071999092896778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,0,0.027690666417280834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,0,0.02958933264017105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,1,64,128,1,fp8,fp8,0,0.027221334477265675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,0,0.028543998797734577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,1,64,0,1,fp8,fp8,0,0.02718399961789449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,0,0.029370665550231934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,0,0.029605334003766377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,0,0.029285334050655365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,4,64,128,1,fp8,fp8,0,0.028607999285062153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,0,0.02942399928967158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,4,64,0,1,fp8,fp8,0,0.027445333699385326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,0,0.028570666909217834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,0,0.027989332874615986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,0,0.029333333174387615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,8,64,128,1,fp8,fp8,0,0.02736533433198929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,0,0.029130667448043823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,8,64,0,1,fp8,fp8,0,0.02749866743882497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,96,64,128,1,float16,float16,0,0.02109333376089732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,96,64,0,1,float16,float16,0,0.021370666722456615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,96,64,128,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,96,64,128,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,96,64,0,1,float16,fp8,0,0.023013333479563396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,96,64,0,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,0,0.021573332448800404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,0,0.021290667355060577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,0,0.022954667607943218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,1,64,128,1,fp8,fp8,0,0.021888000269730885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,1,64,0,1,fp8,fp8,0,0.021274665991465252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,0,0.021557333568731945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,4,64,128,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,0,0.02195200075705846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,0,0.022597332795461018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,4,64,0,1,fp8,fp8,0,0.021418665846188862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,0,0.021770666042963665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,8,64,128,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,0,0.02186133215824763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,8,64,0,1,fp8,fp8,0,0.02161066730817159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,96,64,128,1,float16,float16,0,0.018986667195955913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,96,64,0,1,float16,float16,0,0.018810667097568512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,96,64,128,1,float16,fp8,0,0.01961600035429001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,96,64,128,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,96,64,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,0,0.019434666881958645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,96,64,0,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,0,0.019130667050679524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,1,64,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,1,64,0,1,fp8,fp8,0,0.018789333601792652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,0,0.018725333114465077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,0,0.01926933353145917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,0,0.020330666253964107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,4,64,128,1,fp8,fp8,0,0.018266666680574417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,4,64,0,1,fp8,fp8,0,0.017423999806245167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,0,0.019093333433071773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,8,64,128,1,fp8,fp8,0,0.01871466636657715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,8,64,0,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,96,64,128,1,float16,float16,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,96,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,96,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,96,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,96,64,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,96,64,0,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,0,0.01552533358335495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,1,64,128,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,1,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,0,0.015487999965747198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,4,64,128,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,4,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,8,64,128,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,8,64,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,64,128,1,float16,float16,0,3.449946721394857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,64,128,1,float16,fp8,0,3.477696100870768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,64,128,1,fp8,fp8,0,3.228405316670736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,64,128,1,float16,float16,0,3.4863414764404297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,64,0,1,float16,float16,0,22.819076538085938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,64,0,1,fp8,fp8,0,19.522064208984375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,64,0,1,float16,fp8,0,21.963414510091145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,64,128,1,float16,fp8,0,3.5157388051350913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,64,128,1,fp8,fp8,0,3.273248036702474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,64,128,1,float16,float16,0,3.507648150126139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,64,0,1,float16,float16,0,25.30236307779948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,64,128,1,float16,fp8,0,3.5356534322102866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,64,128,1,fp8,fp8,0,3.298960049947103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,64,0,1,fp8,fp8,0,19.5656000773112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,64,0,1,float16,fp8,0,21.91973876953125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,64,128,1,float16,float16,0,3.5402399698893228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,64,0,1,float16,float16,0,24.91009012858073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,64,128,1,float16,fp8,0,3.5671841303507485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,64,128,1,fp8,fp8,0,3.3351306915283203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,64,0,1,fp8,fp8,0,19.58843739827474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,64,0,1,float16,fp8,0,25.862325032552082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,64,128,1,float16,float16,0,2.0480213165283203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,64,0,1,float16,float16,0,22.84704081217448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,64,128,1,float16,fp8,0,2.102229277292887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,64,128,1,fp8,fp8,0,1.9881866772969563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,64,0,1,float16,float16,0,12.046452840169271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,64,0,1,fp8,fp8,0,19.628772735595703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,64,128,1,float16,float16,0,1.8056000073750813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,64,0,1,float16,fp8,0,25.16448465983073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,64,128,1,float16,fp8,0,1.818933327992757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,64,128,1,fp8,fp8,0,1.6912479400634766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,64,0,1,float16,fp8,0,11.13916269938151
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,64,0,1,fp8,fp8,0,10.191109339396158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,64,128,1,float16,float16,0,1.808575948079427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,64,0,1,float16,float16,0,10.918960571289062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,64,128,1,float16,fp8,0,1.8234880765279133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,64,0,1,fp8,fp8,0,9.88643709818522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,64,128,1,fp8,fp8,0,1.698464075724284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,64,0,1,float16,fp8,0,11.133482615152994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,64,128,1,float16,float16,0,1.8149867057800293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,64,0,1,float16,float16,0,11.611498514811197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,64,128,1,float16,fp8,0,1.8324693044026692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,64,128,1,fp8,fp8,0,1.7075947125752766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,64,0,1,fp8,fp8,0,9.891578674316406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,64,0,1,float16,fp8,0,10.767098744710287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,64,128,1,float16,float16,0,1.8294506072998047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,64,0,1,float16,float16,0,11.94168472290039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,64,0,1,fp8,fp8,0,9.901018778483072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,64,0,1,float16,fp8,0,11.071338653564453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,64,128,1,float16,fp8,0,1.8486827214558919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,64,128,1,fp8,fp8,0,1.7250453631083171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,64,128,1,float16,float16,0,1.1225600242614746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,64,0,1,float16,float16,0,12.392906188964844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,64,128,1,float16,fp8,0,1.148960034052531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,64,0,1,float16,float16,0,5.616074879964192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,64,128,1,fp8,fp8,0,1.0995253721872966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,64,0,1,fp8,fp8,0,9.918863932291666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,64,0,1,float16,fp8,0,13.217301686604818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,64,128,1,float16,float16,0,1.0073280334472656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,64,128,1,float16,fp8,0,1.01419202486674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,64,0,1,float16,fp8,0,5.648143768310547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,64,0,1,fp8,fp8,0,5.24830945332845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,64,128,1,fp8,fp8,0,0.9524426460266113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,64,128,1,float16,float16,0,1.011525313059489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,64,0,1,float16,float16,0,5.900522867838542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,64,0,1,fp8,fp8,0,5.096213340759277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,64,128,1,float16,fp8,0,1.0180959701538086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,64,0,1,float16,fp8,0,5.470479965209961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,64,128,1,fp8,fp8,0,0.9548106988271078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,64,0,1,float16,float16,0,5.4895680745442705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,64,128,1,float16,float16,0,1.012384017308553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,64,128,1,float16,fp8,0,1.0202773412068684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,64,128,1,fp8,fp8,0,0.9595519701639811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,64,0,1,fp8,fp8,0,5.101888020833333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,64,0,1,float16,fp8,0,5.742912292480469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,64,128,1,float16,float16,0,1.07805331548055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,64,0,1,float16,float16,0,5.476853052775065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,64,128,1,float16,fp8,0,1.0304640134175618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,64,128,1,fp8,fp8,0,0.9680000146230062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,64,0,1,fp8,fp8,0,5.106778780619304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,64,0,1,float16,fp8,0,5.667472203572591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,64,128,1,float16,float16,0,0.8093600273132324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,64,0,1,float16,float16,0,5.656848271687825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,64,128,1,float16,fp8,0,0.7879520257314047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,64,0,1,float16,float16,0,3.086186726888021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,64,128,1,fp8,fp8,0,0.7447146574656168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,64,0,1,fp8,fp8,0,5.116858800252278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,64,0,1,float16,fp8,0,5.505231857299805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,64,128,1,float16,float16,0,0.7819466590881348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,64,128,1,float16,fp8,0,0.7827413082122803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,64,0,1,fp8,fp8,0,2.8686132431030273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,64,0,1,float16,fp8,0,3.083360036214193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,64,128,1,fp8,fp8,0,0.7411999702453613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,64,0,1,float16,float16,0,3.064469337463379
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,64,128,1,float16,float16,0,0.780735969543457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,64,128,1,float16,fp8,0,0.7810986836751302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,64,0,1,fp8,fp8,0,2.8655306498209634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,64,0,1,float16,fp8,0,3.065786679585775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,64,128,1,fp8,fp8,0,0.740821361541748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,64,0,1,float16,float16,0,3.0646079381306968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,64,128,1,float16,float16,0,0.779695987701416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,64,128,1,float16,fp8,0,0.7806986967722574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,64,0,1,fp8,fp8,0,2.86625067392985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,64,0,1,float16,fp8,0,3.072789192199707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,64,128,1,fp8,fp8,0,0.7403199672698975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,64,0,1,float16,float16,0,3.065749486287435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,64,128,1,float16,float16,0,0.7814079920450846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,64,128,1,float16,fp8,0,0.7798666954040527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,64,0,1,fp8,fp8,0,2.8638346989949546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,64,0,1,float16,fp8,0,3.066080093383789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,64,128,1,fp8,fp8,0,0.7399786313374838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,64,0,1,float16,float16,0,3.0813172658284507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,64,0,1,fp8,fp8,0,2.866367975870768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,64,0,1,float16,fp8,0,3.0732692082722983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,64,128,1,float16,float16,0,2.574298699696859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,64,128,1,fp8,fp8,0,2.397792021433512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,64,128,1,float16,fp8,0,2.5949920018514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,64,128,1,float16,float16,0,2.5838774045308432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,64,0,1,float16,float16,0,13.294170379638672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,64,0,1,float16,fp8,0,12.394864400227865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,64,0,1,fp8,fp8,0,11.496693929036459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,64,128,1,float16,fp8,0,2.6033973693847656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,64,128,1,fp8,fp8,0,2.4212586085001626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,64,128,1,float16,float16,0,2.5956692695617676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,64,0,1,float16,float16,0,13.2172482808431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,64,128,1,float16,fp8,0,2.618746598561605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,64,128,1,fp8,fp8,0,2.438490708669027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,64,0,1,fp8,fp8,0,11.520927429199219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,64,0,1,float16,fp8,0,13.516112009684244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,64,128,1,float16,float16,0,2.6165812810262046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,64,0,1,float16,float16,0,12.383711496988932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,64,128,1,float16,fp8,0,2.640218734741211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,64,128,1,fp8,fp8,0,2.4678452809651694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,64,0,1,fp8,fp8,0,11.538064320882162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,64,0,1,float16,fp8,0,13.506037394205729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,64,128,1,float16,float16,0,1.5353546142578125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,64,0,1,float16,float16,0,13.719156901041666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,64,128,1,float16,fp8,0,1.57094939549764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,64,128,1,fp8,fp8,0,1.491418679555257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,64,0,1,float16,float16,0,7.854250590006511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,64,0,1,fp8,fp8,0,11.566885630289713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,64,0,1,float16,fp8,0,12.473492940266928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,64,128,1,float16,float16,0,1.3553813298543294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,64,128,1,float16,fp8,0,1.3655680020650227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,64,128,1,fp8,fp8,0,1.2714293003082275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,64,0,1,fp8,fp8,0,6.082522710164388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,64,0,1,float16,fp8,0,7.163056055704753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,64,0,1,float16,float16,0,6.282010396321614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,64,128,1,float16,float16,0,1.3584319750467937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,64,0,1,float16,fp8,0,6.406426747639974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,64,0,1,fp8,fp8,0,5.859605153401692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,64,128,1,float16,fp8,0,1.3795733451843262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,64,128,1,fp8,fp8,0,1.2758666674296062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,64,0,1,float16,float16,0,6.326666514078776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,64,128,1,float16,float16,0,1.3627999623616536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,64,128,1,float16,fp8,0,1.3766613006591797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,64,128,1,fp8,fp8,0,1.2842613061269124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,64,0,1,fp8,fp8,0,5.863701502482097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,64,0,1,float16,fp8,0,6.313530604044597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,64,128,1,float16,float16,0,1.4044267336527507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,64,0,1,float16,float16,0,6.298789342244466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,64,128,1,float16,fp8,0,1.3891520500183105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,64,0,1,fp8,fp8,0,5.872213363647461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,64,128,1,fp8,fp8,0,1.2985226313273113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,64,0,1,float16,fp8,0,6.460069020589192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,64,128,1,float16,float16,0,0.8460373083750407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,64,0,1,float16,float16,0,6.557034810384114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,64,128,1,float16,fp8,0,0.8654987017313639
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,64,128,1,fp8,fp8,0,0.8300213019053141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,64,0,1,float16,float16,0,3.541130701700846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,64,0,1,fp8,fp8,0,5.888042449951172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,64,0,1,float16,fp8,0,6.726746877034505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,64,128,1,float16,float16,0,0.7616213162740072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,64,128,1,float16,fp8,0,0.7667733033498129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,64,0,1,fp8,fp8,0,3.161418596903483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,64,0,1,float16,fp8,0,3.398245175679525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,64,128,1,fp8,fp8,0,0.7203306357065836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,64,128,1,float16,float16,0,0.7633492946624756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,64,0,1,float16,float16,0,3.493589401245117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,64,128,1,float16,fp8,0,0.7694293657938639
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,64,0,1,fp8,fp8,0,3.049626668294271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,64,0,1,float16,fp8,0,3.316197395324707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,64,128,1,fp8,fp8,0,0.7238079706827799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,64,128,1,float16,float16,0,0.7664799690246582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,64,0,1,float16,float16,0,3.2763039271036782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,64,128,1,float16,fp8,0,0.7727200190226237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,64,0,1,fp8,fp8,0,3.0523465474446616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,64,0,1,float16,fp8,0,3.294560114542643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,64,128,1,fp8,fp8,0,0.7272373040517172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,64,0,1,float16,float16,0,3.297856012980143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,64,128,1,float16,float16,0,0.7710879643758138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,64,128,1,float16,fp8,0,0.7783253192901611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,64,0,1,fp8,fp8,0,3.0552587509155273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,64,0,1,float16,fp8,0,3.2782932917277017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,64,128,1,fp8,fp8,0,0.7326666514078776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,64,128,1,float16,float16,0,0.592847983042399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,64,0,1,float16,float16,0,3.279818534851074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,64,128,1,float16,fp8,0,0.5948426723480225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,64,0,1,fp8,fp8,0,3.0622240702311196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,64,128,1,fp8,fp8,0,0.5663839975992838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,64,0,1,float16,fp8,0,3.3031574885050454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,64,0,1,float16,float16,0,1.8987785975138347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,64,128,1,float16,float16,0,0.5948479970296224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,64,128,1,float16,fp8,0,0.5948213338851929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,64,0,1,fp8,fp8,0,1.7671573956807454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,64,0,1,float16,fp8,0,1.8970613479614258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,64,0,1,float16,float16,0,1.886965274810791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,64,128,1,fp8,fp8,0,0.5660586754480997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,64,128,1,float16,float16,0,0.5946346521377563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,64,128,1,float16,fp8,0,0.5939679940541586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,64,0,1,fp8,fp8,0,1.7642347017923992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,64,0,1,float16,fp8,0,1.8838879267374675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,64,0,1,float16,float16,0,1.888304074605306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,64,128,1,fp8,fp8,0,0.5617280006408691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,64,128,1,float16,float16,0,0.5960853497187296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,64,0,1,fp8,fp8,0,1.7657599449157715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,64,0,1,float16,fp8,0,1.8864587148030598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,64,128,1,float16,fp8,0,0.5942080020904541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,64,128,1,fp8,fp8,0,0.5656586488087972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,64,0,1,float16,float16,0,1.8873440424601238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,64,128,1,float16,float16,0,0.5954773426055908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,64,0,1,float16,fp8,0,1.888319969177246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,64,0,1,fp8,fp8,0,1.7635040283203125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,64,128,1,float16,fp8,0,0.5963093439737955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,64,0,1,float16,float16,0,1.8906453450520833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,64,128,1,fp8,fp8,0,0.5635146697362264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,64,0,1,float16,fp8,0,1.888437271118164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,64,0,1,fp8,fp8,0,1.767189343770345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,64,128,1,float16,float16,0,2.1388907432556152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,64,128,1,fp8,fp8,0,1.9951893488566081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,64,128,1,float16,fp8,0,2.158426602681478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,64,128,1,float16,float16,0,2.144170602162679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,64,0,1,float16,float16,0,9.277104059855143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,64,0,1,float16,fp8,0,8.90004793802897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,64,0,1,fp8,fp8,0,8.270986557006836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,64,128,1,float16,fp8,0,2.1630400021870932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,64,128,1,fp8,fp8,0,2.0079466501871743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,64,0,1,float16,float16,0,8.88807487487793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,64,128,1,float16,float16,0,2.1535092989603677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,64,128,1,float16,fp8,0,2.1732160250345864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,64,0,1,fp8,fp8,0,8.28488540649414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,64,0,1,float16,fp8,0,9.34933853149414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,64,128,1,fp8,fp8,0,2.021567980448405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,64,128,1,float16,float16,0,2.1724747021993003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,64,0,1,float16,float16,0,9.345343907674154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,64,128,1,float16,fp8,0,2.1933226585388184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,64,0,1,fp8,fp8,0,8.297322591145834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,64,0,1,float16,fp8,0,9.119370778401693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,64,128,1,fp8,fp8,0,2.0461653073628745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,64,0,1,float16,float16,0,8.944565455118815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,64,128,1,float16,float16,0,1.2817280292510986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,64,128,1,float16,fp8,0,1.3103093306223552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,64,128,1,fp8,fp8,0,1.2459999720255535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,64,0,1,float16,float16,0,4.724501291910808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,64,0,1,float16,fp8,0,10.617120107014975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,64,0,1,fp8,fp8,0,8.321231842041016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,64,128,1,float16,float16,0,1.1309813658396404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,64,128,1,float16,fp8,0,1.1413599650065105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,64,0,1,float16,fp8,0,4.753936131795247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,64,0,1,fp8,fp8,0,4.4182132085164385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,64,128,1,fp8,fp8,0,1.063962697982788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,64,0,1,float16,float16,0,4.661402702331543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,64,128,1,float16,float16,0,1.134010632832845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,64,128,1,float16,fp8,0,1.1434133052825928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,64,0,1,float16,fp8,0,4.653264045715332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,64,128,1,fp8,fp8,0,1.0669386386871338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,64,0,1,fp8,fp8,0,4.232362747192383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,64,128,1,float16,float16,0,1.1397706667582195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,64,0,1,float16,float16,0,4.735040028889974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,64,128,1,float16,fp8,0,1.1507093111673992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,64,0,1,fp8,fp8,0,4.234896024068196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,64,0,1,float16,fp8,0,4.604629198710124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,64,128,1,fp8,fp8,0,1.0738933086395264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,64,128,1,float16,float16,0,1.1485119660695393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,64,0,1,float16,float16,0,4.551845232645671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,64,128,1,float16,fp8,0,1.1609973112742107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,64,128,1,fp8,fp8,0,1.0858559608459473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,64,0,1,fp8,fp8,0,4.2423200607299805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,64,0,1,float16,fp8,0,4.693493207295735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,64,128,1,float16,float16,0,0.7063199679056803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,64,0,1,float16,float16,0,4.760789235432942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,64,128,1,float16,fp8,0,0.7533813317616781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,64,0,1,float16,float16,0,2.5200746854146323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,64,128,1,fp8,fp8,0,0.69540802637736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,64,0,1,fp8,fp8,0,4.252837181091309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,64,0,1,float16,fp8,0,4.579557418823242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,64,128,1,float16,float16,0,0.6363146702448527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,64,128,1,float16,fp8,0,0.6408480008443197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,64,0,1,fp8,fp8,0,2.310896078745524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,64,0,1,float16,fp8,0,2.6331307093302407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,64,128,1,fp8,fp8,0,0.6027359962463379
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,64,0,1,float16,float16,0,2.3719147046407065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,64,128,1,float16,float16,0,0.6360426743825277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,64,128,1,float16,fp8,0,0.6419519980748495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,64,0,1,fp8,fp8,0,2.2184267044067383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,64,0,1,float16,fp8,0,2.533936023712158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,64,128,1,fp8,fp8,0,0.6051199833552042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,64,0,1,float16,float16,0,2.37499205271403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,64,128,1,float16,float16,0,0.6396373510360718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,64,128,1,float16,fp8,0,0.6460640033086141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,64,0,1,float16,fp8,0,2.449626604715983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,64,0,1,fp8,fp8,0,2.2199573516845703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,64,128,1,fp8,fp8,0,0.6077226797739664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,64,0,1,float16,float16,0,2.386960029602051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,64,128,1,float16,float16,0,0.6438560088475546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,64,128,1,float16,fp8,0,0.6508320172627767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,64,0,1,fp8,fp8,0,2.224181334177653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,64,0,1,float16,fp8,0,2.3939572970072427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,64,0,1,float16,float16,0,2.3948000272115073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,64,128,1,fp8,fp8,0,0.6132426659266154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,64,128,1,float16,float16,0,0.4994773467381795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,64,128,1,float16,fp8,0,0.49794665972391766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,64,0,1,float16,float16,0,1.4073386192321777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,64,0,1,float16,fp8,0,2.3908799489339194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,64,0,1,fp8,fp8,0,2.2274880409240723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,64,128,1,fp8,fp8,0,0.47222399711608887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,64,128,1,float16,float16,0,0.494485338528951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,64,0,1,float16,fp8,0,1.4095093409220378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,64,128,1,float16,fp8,0,0.49586133162180585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,64,0,1,fp8,fp8,0,1.3122080167134602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,64,0,1,float16,float16,0,1.3987572987874348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,64,128,1,fp8,fp8,0,0.47138134638468426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,64,128,1,float16,float16,0,0.4965279897054036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,64,0,1,fp8,fp8,0,1.3105119864145915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,64,0,1,float16,fp8,0,1.3982346852620442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,64,128,1,float16,fp8,0,0.496560017267863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,64,128,1,fp8,fp8,0,0.4719093243281047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,64,0,1,float16,float16,0,1.3981013298034668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,64,128,1,float16,float16,0,0.49739734331766766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,64,0,1,float16,fp8,0,1.399722735087077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,64,0,1,fp8,fp8,0,1.3106026649475098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,64,128,1,float16,fp8,0,0.4977653423945109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,64,0,1,float16,float16,0,1.3984692891438801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,64,128,1,fp8,fp8,0,0.4703093369801839
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,64,128,1,float16,float16,0,0.4944426616032918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,64,0,1,float16,fp8,0,1.4018239974975586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,64,0,1,fp8,fp8,0,1.3124746481577556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,64,128,1,float16,fp8,0,0.4970613320668538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,64,0,1,float16,float16,0,1.4043679237365723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,64,128,1,fp8,fp8,0,0.4719093243281047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,64,0,1,float16,fp8,0,1.403253396352132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,64,0,1,fp8,fp8,0,1.3111519813537598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,64,128,1,float16,float16,0,3.351263999938965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,64,128,1,fp8,fp8,0,3.131333351135254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,64,128,1,float16,fp8,0,3.3761812845865884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,64,128,1,float16,float16,0,3.3913866678873696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,64,0,1,float16,float16,0,11.841178894042969
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,64,0,1,fp8,fp8,0,11.024351755777994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,64,0,1,float16,fp8,0,12.511669158935547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,64,128,1,fp8,fp8,0,3.1724640528361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,64,128,1,float16,fp8,0,3.4173173904418945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,64,128,1,float16,float16,0,3.4103307723999023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,64,0,1,float16,float16,0,12.727850596110025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,64,128,1,float16,fp8,0,3.4367361068725586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,64,0,1,fp8,fp8,0,11.068181355794271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,64,0,1,float16,fp8,0,13.588075002034506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,64,128,1,fp8,fp8,0,3.198490778605143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,64,0,1,float16,float16,0,12.215775807698568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,64,128,1,float16,float16,0,3.442938804626465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,64,128,1,float16,fp8,0,3.470501263936361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,64,0,1,fp8,fp8,0,11.093818664550781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,64,128,1,fp8,fp8,0,3.240048090616862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,64,0,1,float16,fp8,0,12.424090067545572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,64,128,1,float16,float16,0,1.9564213752746582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,64,0,1,float16,float16,0,13.527183532714844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,64,128,1,float16,fp8,0,1.9972000122070312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,64,0,1,float16,float16,0,7.349679946899414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,64,128,1,fp8,fp8,0,1.8958560625712078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,64,0,1,fp8,fp8,0,11.137354532877604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,64,0,1,float16,fp8,0,12.886634826660156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,64,128,1,float16,float16,0,1.7092053095499675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,64,128,1,float16,fp8,0,1.7242026329040527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,64,0,1,fp8,fp8,0,5.872416178385417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,64,128,1,fp8,fp8,0,1.5968480110168457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,64,0,1,float16,fp8,0,7.131754557291667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,64,0,1,float16,float16,0,6.829749425252278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,64,128,1,float16,float16,0,1.7148213386535645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,64,128,1,float16,fp8,0,1.7299680709838867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,64,0,1,fp8,fp8,0,5.569909413655599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,64,0,1,float16,fp8,0,6.107429504394531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,64,128,1,fp8,fp8,0,1.6058826446533203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,64,128,1,float16,float16,0,1.721552054087321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,64,0,1,float16,float16,0,6.699487686157227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,64,128,1,float16,fp8,0,1.7655466397603352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,64,0,1,fp8,fp8,0,5.57634162902832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,64,0,1,float16,fp8,0,6.0052744547526045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,64,128,1,fp8,fp8,0,1.6158347129821777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,64,0,1,float16,float16,0,6.195077260335286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,64,128,1,float16,float16,0,1.7349707285563152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,64,128,1,float16,fp8,0,1.7545812924702961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,64,0,1,fp8,fp8,0,5.58682123819987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,64,0,1,float16,fp8,0,6.023647944132487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,64,128,1,fp8,fp8,0,1.6312534014383953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,64,128,1,float16,float16,0,1.024725357691447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,64,0,1,float16,float16,0,6.269946416219075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,64,128,1,float16,fp8,0,1.106496016184489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,64,0,1,fp8,fp8,0,5.6055145263671875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,64,0,1,float16,float16,0,3.3745654424031577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,64,128,1,fp8,fp8,0,0.9986133575439453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,64,0,1,float16,fp8,0,6.048869450887044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,64,128,1,float16,float16,0,0.908128023147583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,64,128,1,float16,fp8,0,0.9158559640248617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,64,0,1,fp8,fp8,0,3.0127414067586265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,64,0,1,float16,fp8,0,3.315039952596029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,64,128,1,fp8,fp8,0,0.8547146320343018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,64,0,1,float16,float16,0,3.0734825134277344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,64,128,1,float16,float16,0,0.9105813503265381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,64,128,1,float16,fp8,0,0.9173920154571533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,64,0,1,fp8,fp8,0,2.8632640838623047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,64,0,1,float16,fp8,0,3.177274703979492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,64,0,1,float16,float16,0,3.0937067667643228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,64,128,1,fp8,fp8,0,0.857157309850057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,64,128,1,float16,float16,0,0.9154880046844482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,64,128,1,float16,fp8,0,0.9236906369527181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,64,0,1,fp8,fp8,0,2.8673121134440103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,64,0,1,float16,fp8,0,3.114560127258301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,64,0,1,float16,float16,0,3.2431678771972656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,64,128,1,fp8,fp8,0,0.8640426794687907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,64,128,1,float16,float16,0,0.9391360282897949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,64,128,1,float16,fp8,0,0.9300586382548014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,64,0,1,fp8,fp8,0,2.873594601949056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,64,0,1,float16,fp8,0,3.2581440607706704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,64,0,1,float16,float16,0,3.095088005065918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,64,128,1,fp8,fp8,0,0.8711360295613607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,64,128,1,float16,float16,0,0.5686026811599731
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,64,128,1,float16,fp8,0,0.5832586685816447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,64,0,1,float16,float16,0,1.6880426406860352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,64,128,1,fp8,fp8,0,0.5600159962972006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,64,0,1,fp8,fp8,0,2.8813867568969727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,64,0,1,float16,fp8,0,3.1397441228230796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,64,128,1,float16,float16,0,0.512826681137085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,64,0,1,float16,fp8,0,1.7027254104614258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,64,128,1,float16,fp8,0,0.5160959959030151
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,64,0,1,fp8,fp8,0,1.5916320482889812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,64,0,1,float16,float16,0,1.65449062983195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,64,128,1,fp8,fp8,0,0.4950079917907715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,64,128,1,float16,float16,0,0.5226933161417643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,64,0,1,float16,fp8,0,1.625439961751302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,64,0,1,fp8,fp8,0,1.5172373453776042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,64,128,1,float16,fp8,0,0.5149173339207967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,64,0,1,float16,float16,0,1.6218506495157878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,64,128,1,fp8,fp8,0,0.4873066743214925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,64,128,1,float16,float16,0,0.5146826505661011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,64,0,1,fp8,fp8,0,1.516485373179118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,64,0,1,float16,fp8,0,1.6273493766784668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,64,128,1,float16,fp8,0,0.519322673479716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,64,128,1,fp8,fp8,0,0.49059200286865234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,64,0,1,float16,float16,0,1.6551839510599773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,64,128,1,float16,float16,0,0.5518826643625895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,64,0,1,float16,fp8,0,1.6308639844258626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,64,0,1,fp8,fp8,0,1.5201600392659504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,64,128,1,float16,fp8,0,0.5228533347447714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,64,128,1,fp8,fp8,0,0.4946560064951579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,64,0,1,float16,float16,0,1.6321226755777996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,64,128,1,float16,float16,0,0.40254934628804523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,64,0,1,float16,fp8,0,1.6381653149922688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,64,0,1,fp8,fp8,0,1.5250080426534016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,64,128,1,float16,fp8,0,0.40304001172383624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,64,0,1,float16,float16,0,0.9899893601735433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,64,128,1,fp8,fp8,0,0.3832746744155884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,64,128,1,float16,float16,0,0.40028266112009686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,64,0,1,float16,fp8,0,0.9916000366210938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,64,0,1,fp8,fp8,0,0.9220960140228271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,64,128,1,float16,fp8,0,0.4015146493911743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,64,128,1,fp8,fp8,0,0.38118934631347656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,64,0,1,float16,float16,0,0.9799520174662272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,64,128,1,float16,float16,0,0.40085868040720624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,64,0,1,fp8,fp8,0,0.9189226627349854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,64,0,1,float16,fp8,0,0.9835360050201416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,64,128,1,float16,fp8,0,0.4023626645406087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,64,128,1,fp8,fp8,0,0.3803253173828125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,64,0,1,float16,float16,0,0.9800639947255453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,64,0,1,float16,fp8,0,0.9838879903157552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,64,128,1,float16,float16,0,0.4008053143819173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,64,0,1,fp8,fp8,0,0.9204533100128174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,64,128,1,fp8,fp8,0,0.3818986813227336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,64,128,1,float16,fp8,0,0.401583989461263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,64,0,1,float16,float16,0,0.9829386870066324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,64,128,1,float16,float16,0,0.40218667189280194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,64,0,1,float16,fp8,0,0.9823413689931234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,64,0,1,fp8,fp8,0,0.9194400310516357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,64,128,1,float16,fp8,0,0.4031840165456136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,64,0,1,float16,float16,0,0.9868106842041016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,64,128,1,fp8,fp8,0,0.3816853364308675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,64,0,1,float16,fp8,0,0.9855199654897054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,64,0,1,fp8,fp8,0,0.9206666946411133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,64,128,1,float16,float16,0,2.4965813954671225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,64,128,1,fp8,fp8,0,2.3197760581970215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,64,128,1,float16,fp8,0,2.516757329305013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,64,128,1,float16,float16,0,2.5058933893839517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,64,0,1,float16,float16,0,7.1666717529296875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,64,0,1,fp8,fp8,0,6.659338633219401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,64,128,1,float16,fp8,0,2.5262133280436196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,64,0,1,float16,fp8,0,7.285664240519206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,64,128,1,fp8,fp8,0,2.3450026512145996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,64,0,1,float16,float16,0,7.403343836466472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,64,128,1,float16,float16,0,2.5184480349222818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,64,0,1,fp8,fp8,0,6.683343887329102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,64,0,1,float16,fp8,0,7.200629552205403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,64,128,1,float16,fp8,0,2.541306654612223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,64,128,1,fp8,fp8,0,2.362880071004232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,64,0,1,float16,float16,0,7.498085021972656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,64,128,1,float16,float16,0,2.5405920346577964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,64,0,1,fp8,fp8,0,6.703173319498698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,64,128,1,float16,fp8,0,2.562922636667887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,64,0,1,float16,fp8,0,7.544511795043945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,64,128,1,fp8,fp8,0,2.3898186683654785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,64,128,1,float16,float16,0,1.4668960571289062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,64,0,1,float16,float16,0,8.371562957763672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,64,128,1,float16,fp8,0,1.4978987375895183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,64,0,1,fp8,fp8,0,6.730282465616862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,64,0,1,float16,fp8,0,7.675130844116211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,64,0,1,float16,float16,0,3.856581370035807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,64,128,1,fp8,fp8,0,1.419007937113444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,64,128,1,float16,float16,0,1.283135970433553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,64,128,1,float16,fp8,0,1.2944800059000652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,64,0,1,float16,fp8,0,4.002794583638509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,64,0,1,fp8,fp8,0,3.6108853022257485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,64,0,1,float16,float16,0,3.8789599736531577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,64,128,1,fp8,fp8,0,1.2005173365275066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,64,128,1,float16,float16,0,1.2874826590220134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,64,0,1,fp8,fp8,0,3.3879947662353516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,64,0,1,float16,fp8,0,3.6504745483398438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,64,128,1,float16,fp8,0,1.2986133098602295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,64,0,1,float16,float16,0,3.647738774617513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,64,128,1,fp8,fp8,0,1.2075200080871582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,64,128,1,float16,float16,0,1.2941653728485107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,64,0,1,fp8,fp8,0,3.3933226267496743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,64,0,1,float16,fp8,0,3.673066775004069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,64,128,1,float16,fp8,0,1.3048266569773357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,64,128,1,fp8,fp8,0,1.213594675064087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,64,0,1,float16,float16,0,3.747450510660807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,64,128,1,float16,float16,0,1.303978681564331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,64,0,1,fp8,fp8,0,3.4010772705078125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,64,0,1,float16,fp8,0,3.7780799865722656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,64,128,1,float16,fp8,0,1.317893346150716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,64,128,1,fp8,fp8,0,1.2283519903818767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,64,0,1,float16,float16,0,3.838143984476725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,64,128,1,float16,float16,0,0.7743146419525146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,64,128,1,float16,fp8,0,0.793503999710083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,64,0,1,float16,float16,0,2.000943978627523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,64,0,1,fp8,fp8,0,3.415247917175293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,64,0,1,float16,fp8,0,3.860629399617513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,64,128,1,fp8,fp8,0,0.7545226414998373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,64,128,1,float16,float16,0,0.7052906354268392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,64,0,1,float16,fp8,0,2.0897706349690757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,64,0,1,fp8,fp8,0,1.8678720792134602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,64,128,1,float16,fp8,0,0.6904746691385905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,64,0,1,float16,float16,0,1.8833813667297363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,64,128,1,fp8,fp8,0,0.645146648089091
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,64,128,1,float16,float16,0,0.6873013178507487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,64,0,1,float16,fp8,0,1.8885332743326824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,64,0,1,fp8,fp8,0,1.7573119799296062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,64,128,1,float16,fp8,0,0.6943840185801188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,64,128,1,fp8,fp8,0,0.6493279933929443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,64,0,1,float16,float16,0,1.886906623840332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,64,128,1,float16,float16,0,0.6911626656850179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,64,0,1,float16,fp8,0,1.8915893236796062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,64,0,1,fp8,fp8,0,1.7606080373128254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,64,128,1,float16,fp8,0,0.6965866883595785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,64,128,1,fp8,fp8,0,0.6513226826985677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,64,0,1,float16,float16,0,1.8920000394185383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,64,128,1,float16,float16,0,0.6956586837768555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,64,0,1,fp8,fp8,0,1.7636799812316895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,64,0,1,float16,fp8,0,1.8984692891438801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,64,128,1,float16,fp8,0,0.7034293015797933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,64,128,1,fp8,fp8,0,0.658186674118042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,64,0,1,float16,float16,0,1.8978400230407715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,64,128,1,float16,float16,0,0.4328533411026001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,64,128,1,float16,fp8,0,0.4450240135192871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,64,0,1,float16,fp8,0,1.9063040415445964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,64,0,1,float16,float16,0,1.0581386884053547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,64,0,1,fp8,fp8,0,1.7694932619730632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,64,128,1,fp8,fp8,0,0.4263999859491984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,64,128,1,float16,float16,0,0.38838398456573486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,64,0,1,float16,fp8,0,1.068559964497884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,64,0,1,fp8,fp8,0,0.9993493556976318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,64,128,1,float16,fp8,0,0.39228800932566327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,64,0,1,float16,float16,0,1.0064319769541423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,64,128,1,fp8,fp8,0,0.37118931611378986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,64,128,1,float16,float16,0,0.3892693519592285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,64,0,1,fp8,fp8,0,0.9437867005666097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,64,0,1,float16,fp8,0,1.0095733006795247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,64,128,1,float16,fp8,0,0.3940266768137614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,64,0,1,float16,float16,0,1.0071307023366292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,64,128,1,fp8,fp8,0,0.371829350789388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,64,128,1,float16,float16,0,0.39212266604105633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,64,0,1,float16,fp8,0,1.0106080373128254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,64,0,1,fp8,fp8,0,0.9452959696451823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,64,128,1,float16,fp8,0,0.3951359987258911
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,64,0,1,float16,float16,0,1.0108853181203206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,64,128,1,fp8,fp8,0,0.37455999851226807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,64,128,1,float16,float16,0,0.3951786756515503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,64,0,1,float16,fp8,0,1.015653371810913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,64,0,1,fp8,fp8,0,0.9462506771087646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,64,128,1,float16,fp8,0,0.3996106783548991
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,64,0,1,float16,float16,0,1.0156746705373128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,64,128,1,fp8,fp8,0,0.37757333119710285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,64,128,1,float16,float16,0,0.30908799171447754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,64,0,1,float16,fp8,0,1.0205600261688232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,64,0,1,float16,float16,0,0.6401546796162924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,64,0,1,fp8,fp8,0,0.9509973526000977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,64,128,1,float16,fp8,0,0.3099626700083415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,64,128,1,fp8,fp8,0,0.29388266801834106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,64,128,1,float16,float16,0,0.30723732709884644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,64,0,1,float16,fp8,0,0.6405760049819946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,64,0,1,fp8,fp8,0,0.5977813402811686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,64,128,1,float16,fp8,0,0.30723732709884644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,64,0,1,float16,float16,0,0.635807991027832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,64,128,1,fp8,fp8,0,0.2905973394711812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,64,128,1,float16,float16,0,0.30664533376693726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,64,0,1,float16,fp8,0,0.6358240048090616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,64,0,1,fp8,fp8,0,0.5954879919687907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,64,0,1,float16,float16,0,0.635205348332723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,64,128,1,float16,fp8,0,0.30525867144266766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,64,128,1,fp8,fp8,0,0.29172799984614056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,64,128,1,float16,float16,0,0.30502933263778687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,64,0,1,float16,fp8,0,0.6344799995422363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,64,0,1,fp8,fp8,0,0.5962773164113363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,64,0,1,float16,float16,0,0.6359999974568685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,64,128,1,float16,fp8,0,0.3063093423843384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,64,128,1,fp8,fp8,0,0.2916640043258667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,64,128,1,float16,float16,0,0.30642133951187134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,64,0,1,float16,fp8,0,0.6375626722971598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,64,0,1,fp8,fp8,0,0.5938239892323812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,64,0,1,float16,float16,0,0.6365386644999186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,64,128,1,float16,fp8,0,0.3060479958852132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,64,128,1,fp8,fp8,0,0.29206399122873944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,64,0,1,float16,fp8,0,0.6376479864120483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,64,0,1,fp8,fp8,0,0.5969706773757935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,64,128,1,float16,float16,0,3.2941919962565103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,64,128,1,float16,fp8,0,3.3184213638305664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,64,128,1,fp8,fp8,0,3.0693705876668296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,64,0,1,float16,float16,0,7.223728179931641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,64,128,1,float16,float16,0,3.3340746561686196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,64,0,1,fp8,fp8,0,6.7135575612386065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,64,0,1,float16,fp8,0,7.242394765218099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,64,128,1,float16,fp8,0,3.357701301574707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,64,128,1,fp8,fp8,0,3.1150134404500327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,64,0,1,float16,float16,0,7.264272054036458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,64,128,1,float16,float16,0,3.3513387044270835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,64,0,1,fp8,fp8,0,6.758890787760417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,64,0,1,float16,fp8,0,7.285530726114909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,64,128,1,float16,fp8,0,3.375887870788574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,64,128,1,fp8,fp8,0,3.138469378153483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,64,0,1,float16,float16,0,7.292666753133138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,64,128,1,float16,float16,0,3.3873812357584634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,64,0,1,float16,fp8,0,7.861653645833333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,64,0,1,fp8,fp8,0,6.786698659261067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,64,128,1,float16,fp8,0,3.4117492039998374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,64,128,1,fp8,fp8,0,3.1789118448893228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,64,0,1,float16,float16,0,7.623525619506836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,64,128,1,float16,float16,0,1.9061333338419597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,64,128,1,float16,fp8,0,1.942911942799886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,64,0,1,fp8,fp8,0,6.824597040812175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,64,0,1,float16,float16,0,4.086698532104492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,64,0,1,float16,fp8,0,7.362288157145183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,64,128,1,fp8,fp8,0,1.838058630625407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,64,128,1,float16,float16,0,1.658906618754069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,64,0,1,fp8,fp8,0,3.6786880493164062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,64,128,1,float16,fp8,0,1.672111988067627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,64,0,1,float16,fp8,0,3.95849609375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,64,0,1,float16,float16,0,3.6421759923299155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,64,128,1,fp8,fp8,0,1.5425279935201008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,64,128,1,float16,float16,0,1.6606933275858562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,64,0,1,fp8,fp8,0,3.376943906148275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,64,0,1,float16,fp8,0,3.650202751159668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,64,128,1,float16,fp8,0,1.6758774121602376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,64,128,1,fp8,fp8,0,1.5488053957621257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,64,0,1,float16,float16,0,3.980090777079264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,64,128,1,float16,float16,0,1.6693120002746582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,64,0,1,float16,fp8,0,3.655845324198405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,64,0,1,fp8,fp8,0,3.3849493662516275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,64,128,1,float16,fp8,0,1.6859520276387532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,64,0,1,float16,float16,0,3.6595786412556968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,64,128,1,fp8,fp8,0,1.5615466435750325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,64,128,1,float16,float16,0,1.686303933461507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,64,0,1,float16,fp8,0,3.779674530029297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,64,0,1,fp8,fp8,0,3.3952480951944985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,64,128,1,float16,fp8,0,1.7033653259277344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,64,0,1,float16,float16,0,3.6750561396280923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,64,128,1,fp8,fp8,0,1.5783839225769043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,64,128,1,float16,float16,0,0.9818133513132731
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,64,128,1,float16,fp8,0,1.0030133724212646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,64,0,1,float16,float16,0,2.001882712046305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,64,0,1,float16,fp8,0,3.689903895060221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,64,0,1,fp8,fp8,0,3.414565404256185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,64,128,1,fp8,fp8,0,0.9514826933542887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,64,128,1,float16,float16,0,0.859877347946167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,64,0,1,float16,fp8,0,2.0959839820861816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,64,0,1,fp8,fp8,0,1.8818559646606445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,64,0,1,float16,float16,0,1.8624639511108398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,64,128,1,float16,fp8,0,0.8676160176595052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,64,128,1,fp8,fp8,0,0.8045439720153809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,64,128,1,float16,float16,0,0.8631146748860677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,64,0,1,float16,fp8,0,1.868351936340332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,64,0,1,fp8,fp8,0,1.734831968943278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,64,128,1,float16,fp8,0,0.8701120217641195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,64,0,1,float16,float16,0,1.8654826482137044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,64,128,1,fp8,fp8,0,0.8095093568166097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,64,128,1,float16,float16,0,0.867253303527832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,64,0,1,float16,fp8,0,1.8750826517740886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,64,0,1,fp8,fp8,0,1.738661289215088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,64,128,1,float16,fp8,0,0.8747146924336752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,64,0,1,float16,float16,0,1.872528076171875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,64,128,1,fp8,fp8,0,0.8143253326416016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,64,128,1,float16,float16,0,0.874176025390625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,64,0,1,float16,fp8,0,1.8789067268371582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,64,0,1,fp8,fp8,0,1.7422134081522624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,64,0,1,float16,float16,0,1.8825279871622722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,64,128,1,float16,fp8,0,0.8835786978403727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,64,128,1,fp8,fp8,0,0.8226719697316488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,64,128,1,float16,float16,0,0.5212746858596802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,64,0,1,float16,float16,0,1.0419200261433919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,64,128,1,float16,fp8,0,0.5327253341674805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,64,0,1,float16,fp8,0,1.8898720741271973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,64,0,1,fp8,fp8,0,1.7526987393697102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,64,128,1,fp8,fp8,0,0.5090346733729044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,64,128,1,float16,float16,0,0.4607199827829997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,64,0,1,fp8,fp8,0,0.9846879641215006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,64,0,1,float16,fp8,0,1.0541973114013672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,64,0,1,float16,float16,0,0.9749066829681396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,64,128,1,float16,fp8,0,0.46426133314768475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,64,128,1,fp8,fp8,0,0.4363093376159668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,64,128,1,float16,float16,0,0.46139200528462726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,64,0,1,float16,fp8,0,0.9784213701883951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,64,0,1,fp8,fp8,0,0.9130559762318929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,64,0,1,float16,float16,0,0.9769439697265625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,64,128,1,float16,fp8,0,0.46613868077596027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,64,128,1,fp8,fp8,0,0.4383946657180786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,64,128,1,float16,float16,0,0.4657866557439168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,64,0,1,float16,fp8,0,0.9803893566131592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,64,0,1,fp8,fp8,0,0.9141919612884521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,64,0,1,float16,float16,0,0.9800586700439453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,64,128,1,float16,fp8,0,0.46932268142700195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,64,128,1,fp8,fp8,0,0.440341313680013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,64,128,1,float16,float16,0,0.4686186710993449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,64,0,1,float16,fp8,0,0.9848906993865967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,64,0,1,fp8,fp8,0,0.9170080025990804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,64,128,1,float16,fp8,0,0.47303998470306396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,64,0,1,float16,float16,0,0.984112024307251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,64,128,1,fp8,fp8,0,0.44382933775583905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,64,128,1,float16,float16,0,0.2953439950942993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,64,0,1,float16,fp8,0,0.9909706910451254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,64,0,1,float16,float16,0,0.5665440162022909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,64,128,1,float16,fp8,0,0.30266133944193524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,64,0,1,fp8,fp8,0,0.9214666684468588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,64,128,1,fp8,fp8,0,0.2895413239796956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,64,0,1,float16,fp8,0,0.5746879975001017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,64,128,1,float16,float16,0,0.2592800060908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,64,0,1,fp8,fp8,0,0.5396159887313843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,64,128,1,float16,fp8,0,0.2622879942258199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,64,128,1,fp8,fp8,0,0.2529760003089905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,64,0,1,float16,float16,0,0.5301119883855184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,64,128,1,float16,float16,0,0.2611093322436015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,64,0,1,float16,fp8,0,0.5305066506067911
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,64,0,1,fp8,fp8,0,0.5023733377456665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,64,128,1,float16,fp8,0,0.2621440092722575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,64,128,1,fp8,fp8,0,0.2529760003089905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,64,0,1,float16,float16,0,0.5313599904378256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,64,128,1,float16,float16,0,0.26290132602055866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,64,0,1,fp8,fp8,0,0.503162662188212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,64,0,1,float16,fp8,0,0.5312906503677368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,64,128,1,float16,fp8,0,0.2644159992535909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,64,128,1,fp8,fp8,0,0.25474133094151813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,64,0,1,float16,float16,0,0.5333546797434489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,64,128,1,float16,float16,0,0.2669653296470642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,64,0,1,fp8,fp8,0,0.5032853285471598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,64,0,1,float16,fp8,0,0.5349813302357992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,64,128,1,float16,fp8,0,0.2688213388125102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,64,128,1,fp8,fp8,0,0.25732799371083576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,64,0,1,float16,float16,0,0.5385333299636841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,64,128,1,float16,float16,0,0.21011199553807577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,64,0,1,fp8,fp8,0,0.5074506600697836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,64,0,1,float16,fp8,0,0.5389066537221273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,64,128,1,float16,fp8,0,0.2092906634012858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,64,128,1,fp8,fp8,0,0.19994133710861206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,64,0,1,float16,float16,0,0.36211200555165607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,64,128,1,float16,float16,0,0.2040533423423767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,64,0,1,fp8,fp8,0,0.3367733160654704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,64,0,1,float16,fp8,0,0.36177066961924237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,64,128,1,fp8,fp8,0,0.19568000237147012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,64,128,1,float16,fp8,0,0.20582934220631918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,64,0,1,float16,float16,0,0.3569386800130208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,64,128,1,float16,float16,0,0.20382932821909586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,64,0,1,fp8,fp8,0,0.33393601576487225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,64,0,1,float16,fp8,0,0.3553973436355591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,64,128,1,float16,fp8,0,0.20576000213623047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,64,128,1,fp8,fp8,0,0.19690134127934775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,64,0,1,float16,float16,0,0.35755733648935956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,64,128,1,float16,float16,0,0.20546134312947592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,64,0,1,fp8,fp8,0,0.3351946671803792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,64,0,1,float16,fp8,0,0.3537493149439494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,64,0,1,float16,float16,0,0.3588373263676961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,64,128,1,float16,fp8,0,0.20568533738454184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,64,128,1,fp8,fp8,0,0.19733333587646484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,64,0,1,float16,fp8,0,0.3543999989827474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,64,0,1,fp8,fp8,0,0.3352533181508382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,64,128,1,float16,float16,0,0.20595733324686685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,64,0,1,float16,float16,0,0.35893865426381427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,64,128,1,float16,fp8,0,0.203439990679423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,64,128,1,fp8,fp8,0,0.19763733943303427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,64,0,1,float16,fp8,0,0.35682666301727295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,64,0,1,fp8,fp8,0,0.33529067039489746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,64,128,1,float16,float16,0,2.4454453786214194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,64,128,1,float16,fp8,0,2.467034657796224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,64,128,1,fp8,fp8,0,2.2732213338216147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,64,0,1,float16,float16,0,4.537503878275554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,64,128,1,float16,float16,0,2.4611520767211914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,64,0,1,fp8,fp8,0,4.212106704711914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,64,0,1,float16,fp8,0,4.561557451883952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,64,128,1,float16,fp8,0,2.4814453125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,64,0,1,float16,float16,0,4.551397323608398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,64,128,1,fp8,fp8,0,2.2988160451253257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,64,0,1,float16,fp8,0,4.573648134867351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,64,128,1,float16,float16,0,2.4750986099243164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,64,0,1,fp8,fp8,0,4.238256136576335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,64,0,1,float16,float16,0,4.570597330729167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,64,128,1,fp8,fp8,0,2.314847946166992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,64,128,1,float16,fp8,0,2.4974560737609863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,64,0,1,float16,fp8,0,4.592170715332031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,64,128,1,float16,float16,0,2.500607967376709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,64,0,1,fp8,fp8,0,4.252991994222005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,64,0,1,float16,float16,0,4.60148811340332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,64,128,1,fp8,fp8,0,2.3409172693888345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,64,128,1,float16,fp8,0,2.521887938181559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,64,128,1,float16,float16,0,1.4293759663899739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,64,0,1,float16,float16,0,2.505253314971924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,64,128,1,float16,fp8,0,1.4563147226969402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,64,0,1,float16,fp8,0,4.620416005452474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,64,0,1,fp8,fp8,0,4.280218760172526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,64,128,1,fp8,fp8,0,1.377914587656657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,64,128,1,float16,float16,0,1.2455466588338215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,64,0,1,float16,fp8,0,2.5325546264648438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,64,0,1,fp8,fp8,0,2.3585813840230307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,64,0,1,float16,float16,0,2.3005174001057944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,64,128,1,float16,fp8,0,1.256170670191447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,64,128,1,fp8,fp8,0,1.159765323003133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,64,128,1,float16,float16,0,1.2497493426005046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,64,0,1,float16,fp8,0,2.3089386622111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,64,0,1,fp8,fp8,0,2.138026714324951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,64,0,1,float16,float16,0,2.3059520721435547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,64,128,1,float16,fp8,0,1.260538657506307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,64,128,1,fp8,fp8,0,1.1658133665720622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,64,128,1,float16,float16,0,1.2550293604532878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,64,0,1,float16,fp8,0,2.3173227310180664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,64,0,1,fp8,fp8,0,2.1437172889709473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,64,0,1,float16,float16,0,2.313450654347738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,64,128,1,float16,fp8,0,1.2672053178151448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,64,128,1,fp8,fp8,0,1.1738560199737549
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,64,128,1,float16,float16,0,1.2661173343658447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,64,0,1,float16,fp8,0,2.3245760599772134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,64,0,1,fp8,fp8,0,2.14955202738444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,64,128,1,float16,fp8,0,1.2802613576253254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,64,128,1,fp8,fp8,0,1.187285343805949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,64,0,1,float16,float16,0,2.3269492785135903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,64,128,1,float16,float16,0,0.7386506398518881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,64,0,1,fp8,fp8,0,2.1651840209960938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,64,0,1,float16,fp8,0,2.34005339940389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,64,128,1,float16,fp8,0,0.7571360270182291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,64,128,1,fp8,fp8,0,0.7158613204956055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,64,0,1,float16,float16,0,1.2857866287231445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,64,0,1,float16,fp8,0,1.3046027024586995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,64,128,1,float16,float16,0,0.6478240092595419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,64,0,1,fp8,fp8,0,1.2171626885732014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,64,0,1,float16,float16,0,1.1857759952545166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,64,128,1,fp8,fp8,0,0.6091946760813395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,64,128,1,float16,fp8,0,0.6552693446477255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,64,128,1,float16,float16,0,0.6519360144933065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,64,0,1,fp8,fp8,0,1.1058613459269206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,64,0,1,float16,fp8,0,1.190282662709554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,64,0,1,float16,float16,0,1.189621369043986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,64,128,1,float16,fp8,0,0.6560266812642416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,64,128,1,fp8,fp8,0,0.6112000147501627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,64,0,1,float16,fp8,0,1.193050702412923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,64,128,1,float16,float16,0,0.6539520025253296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,64,0,1,fp8,fp8,0,1.109328031539917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,64,0,1,float16,float16,0,1.1915199756622314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,64,128,1,float16,fp8,0,0.6610986789067587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,64,128,1,fp8,fp8,0,0.6146293481191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,64,128,1,float16,float16,0,0.6602720022201538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,64,0,1,float16,fp8,0,1.199893315633138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,64,0,1,fp8,fp8,0,1.112287998199463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,64,0,1,float16,float16,0,1.19759996732076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,64,128,1,float16,fp8,0,0.6665013233820597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,64,128,1,fp8,fp8,0,0.6217706600824991
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,64,128,1,float16,float16,0,0.3955039978027344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,64,0,1,float16,fp8,0,1.2065920035044353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,64,0,1,float16,float16,0,0.6805866559346517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,64,0,1,fp8,fp8,0,1.1188639799753826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,64,128,1,float16,fp8,0,0.40531734625498456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,64,128,1,fp8,fp8,0,0.38604267438252765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,64,128,1,float16,float16,0,0.34883733590443927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,64,0,1,float16,fp8,0,0.688981294631958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,64,0,1,fp8,fp8,0,0.6437439918518066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,64,0,1,float16,float16,0,0.6285013357798258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,64,128,1,float16,fp8,0,0.353221337000529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,64,128,1,fp8,fp8,0,0.33177600304285687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,64,0,1,float16,fp8,0,0.6297386487325033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,64,128,1,float16,float16,0,0.35026665528615314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,64,0,1,fp8,fp8,0,0.5887626806894938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,64,128,1,float16,fp8,0,0.35304001967112225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,64,0,1,float16,float16,0,0.6272159814834595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,64,128,1,fp8,fp8,0,0.33423467477162677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,64,128,1,float16,float16,0,0.353279987970988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,64,0,1,float16,fp8,0,0.6300106843312582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,64,0,1,fp8,fp8,0,0.5906399885813395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,64,0,1,float16,float16,0,0.6303733189900717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,64,128,1,float16,fp8,0,0.35638399918874103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,64,128,1,fp8,fp8,0,0.33488531907399494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,64,0,1,float16,fp8,0,0.6330346663792928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,64,128,1,float16,float16,0,0.3557066520055135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,64,0,1,fp8,fp8,0,0.5925013224283854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,64,0,1,float16,float16,0,0.6337386767069498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,64,128,1,float16,fp8,0,0.3593120177586873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,64,128,1,fp8,fp8,0,0.3387840191523234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,64,128,1,float16,float16,0,0.2262880007425944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,64,0,1,float16,fp8,0,0.6380373239517212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,64,0,1,fp8,fp8,0,0.5959306557973226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,64,0,1,float16,float16,0,0.37644799550374347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,64,128,1,float16,fp8,0,0.23236799240112305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,64,128,1,fp8,fp8,0,0.2220319906870524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,64,0,1,float16,fp8,0,0.38317867120107013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,64,0,1,fp8,fp8,0,0.3593440055847168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,64,128,1,float16,float16,0,0.19961599508921304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,64,0,1,float16,float16,0,0.346127986907959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,64,128,1,float16,fp8,0,0.19962666432062784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,64,128,1,fp8,fp8,0,0.19330666462580362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,64,0,1,float16,fp8,0,0.3471733331680298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,64,128,1,float16,float16,0,0.1994719902674357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,64,0,1,fp8,fp8,0,0.33026667435963947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,64,128,1,float16,fp8,0,0.19976532459259033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,64,0,1,float16,float16,0,0.34699201583862305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,64,128,1,fp8,fp8,0,0.19327465693155924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,64,0,1,float16,fp8,0,0.3476373354593913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,64,128,1,float16,float16,0,0.19948800404866537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,64,0,1,fp8,fp8,0,0.33077865839004517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,64,0,1,float16,float16,0,0.34727466106414795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,64,128,1,float16,fp8,0,0.2017973264058431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,64,128,1,fp8,fp8,0,0.19558932383855185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,64,0,1,float16,fp8,0,0.34924264748891193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,64,128,1,float16,float16,0,0.20155733823776245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,64,0,1,fp8,fp8,0,0.33054399490356445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,64,0,1,float16,float16,0,0.3489973147710164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,64,128,1,float16,fp8,0,0.2044693430264791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,64,128,1,fp8,fp8,0,0.1988053321838379
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,64,0,1,float16,fp8,0,0.3538293441136678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,64,128,1,float16,float16,0,0.1632426679134369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,64,0,1,fp8,fp8,0,0.3331200083096822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,64,128,1,float16,fp8,0,0.162773331006368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,64,0,1,float16,float16,0,0.24751466512680054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,64,128,1,fp8,fp8,0,0.15500799814860025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,64,128,1,float16,float16,0,0.15852800011634827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,64,0,1,float16,fp8,0,0.2467306653658549
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,64,0,1,fp8,fp8,0,0.2307466665903727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,64,128,1,float16,fp8,0,0.15954132874806723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,64,128,1,fp8,fp8,0,0.15204800168673197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,64,0,1,float16,float16,0,0.24445333083470663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,64,128,1,float16,float16,0,0.1597866714000702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,64,0,1,float16,fp8,0,0.2435093323389689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,64,0,1,fp8,fp8,0,0.2284640073776245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,64,128,1,float16,fp8,0,0.16059199968973795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,64,128,1,fp8,fp8,0,0.15265599886576334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,64,0,1,float16,float16,0,0.24314665794372559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,64,128,1,float16,float16,0,0.15863999724388123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,64,0,1,float16,fp8,0,0.24314665794372559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,64,0,1,fp8,fp8,0,0.23029333353042603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,64,128,1,float16,fp8,0,0.15998933712641397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,64,128,1,fp8,fp8,0,0.15241066614786783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,64,0,1,float16,float16,0,0.24450665712356567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,64,128,1,float16,float16,0,0.16010666886965433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,64,0,1,fp8,fp8,0,0.22829866409301758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,64,0,1,float16,fp8,0,0.24499734242757162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,64,128,1,float16,fp8,0,0.16037333011627197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,64,128,1,fp8,fp8,0,0.15230400363604227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,64,0,1,float16,float16,0,0.24414400259653726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,64,0,1,fp8,fp8,0,0.228928009668986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,64,0,1,float16,fp8,0,0.24556267261505127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,64,128,1,float16,float16,0,3.271002769470215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,64,128,1,float16,fp8,0,3.291738510131836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,64,0,1,float16,float16,0,4.909232139587402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,64,128,1,fp8,fp8,0,3.0291360219319663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,64,0,1,fp8,fp8,0,4.545877456665039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,64,0,1,float16,fp8,0,4.933269182840983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,64,128,1,float16,float16,0,3.318101247151693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,64,0,1,float16,float16,0,4.962805430094401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,64,128,1,fp8,fp8,0,3.054666519165039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,64,128,1,float16,fp8,0,3.3235947291056314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,64,0,1,float16,fp8,0,4.961989402770996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,64,128,1,float16,float16,0,3.318661371866862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,64,0,1,fp8,fp8,0,4.571306546529134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,64,0,1,float16,float16,0,4.957930564880371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,64,128,1,float16,fp8,0,3.3397865295410156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,64,128,1,fp8,fp8,0,3.0739307403564453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,64,128,1,float16,float16,0,3.377920150756836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,64,0,1,float16,fp8,0,4.982831954956055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,64,0,1,fp8,fp8,0,4.590448061625163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,64,0,1,float16,float16,0,5.02564271291097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,64,128,1,float16,fp8,0,3.387770652770996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,64,128,1,fp8,fp8,0,3.11189874013265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,64,128,1,float16,float16,0,1.8851680755615234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,64,0,1,float16,fp8,0,5.037130673726399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,64,0,1,float16,float16,0,2.7346506118774414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,64,0,1,fp8,fp8,0,4.627999941507976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,64,128,1,float16,fp8,0,1.9103573163350422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,64,128,1,fp8,fp8,0,1.810373306274414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,64,0,1,float16,fp8,0,2.761685371398926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,64,128,1,float16,float16,0,1.6268906593322754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,64,0,1,fp8,fp8,0,2.5794347127278647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,64,0,1,float16,float16,0,2.4520692825317383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,64,128,1,fp8,fp8,0,1.5112373034159343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,64,128,1,float16,fp8,0,1.6419199307759602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,64,0,1,fp8,fp8,0,2.275066693623861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,64,128,1,float16,float16,0,1.635248025258382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,64,0,1,float16,fp8,0,2.4655946095784507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,64,0,1,float16,float16,0,2.4612906773885093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,64,128,1,fp8,fp8,0,1.521557331085205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,64,128,1,float16,fp8,0,1.6489920616149902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,64,0,1,float16,fp8,0,2.4736000696818032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,64,0,1,fp8,fp8,0,2.285136063893636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,64,128,1,float16,float16,0,1.643349329630534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,64,0,1,float16,float16,0,2.4711573918660483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,64,128,1,float16,fp8,0,1.6584374109903972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,64,128,1,fp8,fp8,0,1.5338133176167805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,64,0,1,float16,fp8,0,2.4856640497843423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,64,128,1,float16,float16,0,1.6600106557210286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,64,0,1,fp8,fp8,0,2.295754591623942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,64,0,1,float16,float16,0,2.489210605621338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,64,128,1,float16,fp8,0,1.6734347343444824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,64,128,1,fp8,fp8,0,1.5485493342081706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,64,128,1,float16,float16,0,0.9559840361277262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,64,0,1,float16,fp8,0,2.504960060119629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,64,0,1,float16,float16,0,1.3871893882751465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,64,0,1,fp8,fp8,0,2.313802719116211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,64,128,1,float16,fp8,0,0.9756266276041666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,64,128,1,fp8,fp8,0,0.9230133692423502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,64,0,1,float16,fp8,0,1.4069172541300456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,64,128,1,float16,float16,0,0.8338879744211832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,64,0,1,fp8,fp8,0,1.312986691792806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,64,0,1,float16,float16,0,1.2511253356933594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,64,128,1,fp8,fp8,0,0.7774933179219564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,64,128,1,float16,fp8,0,0.8413919607798258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,64,0,1,float16,fp8,0,1.2585439682006836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,64,0,1,fp8,fp8,0,1.1632213592529297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,64,128,1,float16,float16,0,0.8369440237681071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,64,0,1,float16,float16,0,1.2542826334635417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,64,128,1,float16,fp8,0,0.8452533086140951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,64,128,1,fp8,fp8,0,0.7811786333719889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,64,0,1,float16,fp8,0,1.2625386714935303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,64,128,1,float16,float16,0,0.8405173619588217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,64,0,1,fp8,fp8,0,1.170080025990804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,64,0,1,float16,float16,0,1.2608426411946614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,64,128,1,float16,fp8,0,0.849776029586792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,64,128,1,fp8,fp8,0,0.7886666456858317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,64,128,1,float16,float16,0,0.8491466840108236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,64,0,1,float16,fp8,0,1.2698400020599365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,64,0,1,fp8,fp8,0,1.1743253072102864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,64,128,1,fp8,fp8,0,0.7956799666086832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,64,128,1,float16,fp8,0,0.8578399817148844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,64,0,1,float16,float16,0,1.270469347635905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,64,128,1,float16,float16,0,0.5000853141148885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,64,0,1,fp8,fp8,0,1.183008035024007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,64,0,1,float16,fp8,0,1.2783839702606201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,64,0,1,float16,float16,0,0.7215147018432617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,64,128,1,float16,fp8,0,0.5108213424682617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,64,128,1,fp8,fp8,0,0.4842933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,64,128,1,float16,float16,0,0.4353119929631551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,64,0,1,float16,fp8,0,0.7332800229390463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,64,0,1,fp8,fp8,0,0.6847733656565348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,64,0,1,float16,float16,0,0.6512266794840494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,64,128,1,fp8,fp8,0,0.41281068325042725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,64,128,1,float16,fp8,0,0.4392319917678833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,64,0,1,fp8,fp8,0,0.6100533405939738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,64,0,1,float16,fp8,0,0.6542506615320841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,64,128,1,float16,float16,0,0.4370773235956828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,64,0,1,float16,float16,0,0.6529653469721476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,64,128,1,float16,fp8,0,0.44164268175760907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,64,128,1,fp8,fp8,0,0.41281068325042725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,64,0,1,float16,fp8,0,0.6576266686121622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,64,0,1,fp8,fp8,0,0.6115039984385172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,64,128,1,float16,float16,0,0.43981866041819256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,64,0,1,float16,float16,0,0.656005342801412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,64,128,1,float16,fp8,0,0.446122686068217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,64,128,1,fp8,fp8,0,0.41674665609995526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,64,0,1,float16,fp8,0,0.6605493227640787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,64,0,1,fp8,fp8,0,0.6160426537195841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,64,128,1,float16,float16,0,0.44491732120513916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,64,0,1,float16,float16,0,0.6610186497370402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,64,128,1,float16,fp8,0,0.44917333126068115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,64,128,1,fp8,fp8,0,0.42180800437927246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,64,0,1,float16,fp8,0,0.666586677233378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,64,128,1,float16,float16,0,0.26915733019510907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,64,0,1,fp8,fp8,0,0.6182239850362142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,64,0,1,float16,float16,0,0.38629865646362305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,64,128,1,float16,fp8,0,0.2757493257522583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,64,128,1,fp8,fp8,0,0.2651679913202922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,64,0,1,float16,fp8,0,0.3922613461812337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,64,0,1,fp8,fp8,0,0.368938684463501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,64,128,1,float16,float16,0,0.23368000984191895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,64,0,1,float16,float16,0,0.34726401170094806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,64,128,1,float16,fp8,0,0.23657600084940592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,64,128,1,fp8,fp8,0,0.2262453238169352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,64,0,1,float16,fp8,0,0.3493653138478597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,64,0,1,fp8,fp8,0,0.33081066608428955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,64,128,1,float16,float16,0,0.23484265804290771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,64,128,1,float16,fp8,0,0.23776533206303915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,64,0,1,float16,float16,0,0.3470666805903117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,64,128,1,fp8,fp8,0,0.22822932402292886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,64,128,1,float16,float16,0,0.2368639906247457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,64,0,1,float16,fp8,0,0.3515626589457194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,64,0,1,fp8,fp8,0,0.33078932762145996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,64,0,1,float16,float16,0,0.34915733337402344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,64,128,1,float16,fp8,0,0.2401706576347351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,64,128,1,fp8,fp8,0,0.23034665981928507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,64,0,1,float16,fp8,0,0.35156798362731934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,64,0,1,fp8,fp8,0,0.33349335193634033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,64,128,1,float16,float16,0,0.24114133914311728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,64,0,1,float16,float16,0,0.35288000106811523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,64,128,1,float16,fp8,0,0.24304533004760742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,64,128,1,fp8,fp8,0,0.23228265841801962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,64,0,1,float16,fp8,0,0.3577599922815959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,64,128,1,float16,float16,0,0.15431466698646545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,64,0,1,fp8,fp8,0,0.3348160187403361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,64,0,1,float16,float16,0,0.2208906610806783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,64,128,1,float16,fp8,0,0.15785066286722818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,64,128,1,fp8,fp8,0,0.1539306640625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,64,0,1,float16,fp8,0,0.22288533051808676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,64,128,1,float16,float16,0,0.1344586710135142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,64,0,1,fp8,fp8,0,0.21189866463343301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,64,0,1,float16,float16,0,0.19867199659347534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,64,128,1,float16,fp8,0,0.1346453328927358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,64,128,1,fp8,fp8,0,0.12782399853070578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,64,0,1,float16,fp8,0,0.19927465915679932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,64,0,1,fp8,fp8,0,0.1860640048980713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,64,128,1,float16,float16,0,0.1358453333377838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,64,0,1,float16,float16,0,0.20021865765253702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,64,128,1,float16,fp8,0,0.13587733109792074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,64,128,1,fp8,fp8,0,0.12797866264979044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,64,0,1,float16,fp8,0,0.1994240085283915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,64,128,1,float16,float16,0,0.13481066624323526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,64,0,1,fp8,fp8,0,0.18740799029668173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,64,0,1,float16,float16,0,0.19939732551574707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,64,128,1,float16,fp8,0,0.1360319952170054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,64,128,1,fp8,fp8,0,0.1297866702079773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,64,0,1,float16,fp8,0,0.20053333044052124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,64,128,1,float16,float16,0,0.13616533080736795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,64,0,1,fp8,fp8,0,0.18705066045125326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,64,0,1,float16,float16,0,0.19967466592788696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,64,128,1,float16,fp8,0,0.13806399703025818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,64,128,1,fp8,fp8,0,0.13183466593424478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,64,128,1,float16,float16,0,0.11267200112342834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,64,0,1,fp8,fp8,0,0.19121599197387695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,64,0,1,float16,fp8,0,0.20154666900634766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,64,0,1,float16,float16,0,0.14842666188875833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,64,128,1,fp8,fp8,0,0.10939733187357585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,64,128,1,float16,fp8,0,0.1134986678759257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,64,128,1,float16,float16,0,0.11130133271217346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,64,0,1,fp8,fp8,0,0.14257599910100302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,64,0,1,float16,fp8,0,0.14918399850527445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,64,0,1,float16,float16,0,0.147189329067866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,64,128,1,float16,fp8,0,0.11326400438944499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,64,128,1,fp8,fp8,0,0.10755200187365214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,64,0,1,float16,fp8,0,0.1474186678727468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,64,128,1,float16,float16,0,0.11180800199508667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,64,0,1,fp8,fp8,0,0.1402293344338735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,64,128,1,float16,fp8,0,0.11218667030334473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,64,128,1,fp8,fp8,0,0.10731732845306396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,64,0,1,float16,float16,0,0.14680533607800803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,64,128,1,float16,float16,0,0.1128053367137909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,64,0,1,float16,fp8,0,0.1465173363685608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,64,0,1,fp8,fp8,0,0.1402773360411326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,64,128,1,float16,fp8,0,0.11181333661079407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,64,128,1,fp8,fp8,0,0.10758933424949646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,64,0,1,float16,float16,0,0.14656000336011252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,64,0,1,float16,fp8,0,0.14891200264294943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,64,0,1,fp8,fp8,0,0.1402720014254252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,64,128,1,float16,float16,0,0.11168000102043152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,64,128,1,float16,fp8,0,0.11153599619865417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,64,128,1,fp8,fp8,0,0.1076853374640147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,64,0,1,float16,float16,0,0.1483733355998993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,64,0,1,float16,fp8,0,0.14842133720715842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,64,0,1,fp8,fp8,0,0.14034666617711386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,64,128,1,float16,float16,0,2.432000001271566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,64,128,1,float16,fp8,0,2.4423413276672363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,64,0,1,float16,float16,0,3.238784154256185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,64,128,1,fp8,fp8,0,2.2491466204325357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,64,0,1,float16,fp8,0,3.249333381652832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,64,128,1,float16,float16,0,2.4592960675557456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,64,0,1,fp8,fp8,0,2.9929866790771484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,64,0,1,float16,float16,0,3.2661174138387046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,64,128,1,float16,fp8,0,2.4668639500935874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,64,128,1,fp8,fp8,0,2.2674454053243003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,64,0,1,float16,fp8,0,3.277344067891439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,64,0,1,fp8,fp8,0,3.0124588012695312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,64,128,1,float16,float16,0,2.484442710876465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,64,0,1,float16,float16,0,3.2930078506469727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,64,128,1,float16,fp8,0,2.486442724863688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,64,128,1,fp8,fp8,0,2.284554640452067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,64,0,1,float16,fp8,0,3.297578811645508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,64,128,1,float16,float16,0,2.503434658050537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,64,0,1,fp8,fp8,0,3.031797409057617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,64,0,1,float16,float16,0,3.315103848775228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,64,128,1,float16,fp8,0,2.506687959035238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,64,128,1,fp8,fp8,0,2.307722727457682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,64,0,1,float16,fp8,0,3.3180373509724936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,64,128,1,float16,float16,0,1.4158132870992024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,64,0,1,float16,float16,0,1.83734925587972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,64,0,1,fp8,fp8,0,3.055818557739258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,64,128,1,float16,fp8,0,1.4335519472757976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,64,128,1,fp8,fp8,0,1.3560585975646973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,64,0,1,float16,fp8,0,1.8598507245381672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,64,128,1,float16,float16,0,1.2195253372192383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,64,0,1,fp8,fp8,0,1.7366453806559246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,64,0,1,float16,float16,0,1.626442591349284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,64,128,1,float16,fp8,0,1.230463981628418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,64,128,1,fp8,fp8,0,1.133893330891927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,64,0,1,fp8,fp8,0,1.5100746154785156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,64,0,1,float16,fp8,0,1.637407938639323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,64,128,1,float16,float16,0,1.226960023244222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,64,0,1,float16,float16,0,1.635493278503418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,64,128,1,fp8,fp8,0,1.1414559682210286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,64,128,1,float16,fp8,0,1.236997365951538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,64,0,1,float16,fp8,0,1.6470880508422852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,64,0,1,fp8,fp8,0,1.5189119974772136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,64,128,1,float16,float16,0,1.2345866362253826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,64,0,1,float16,float16,0,1.643994649251302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,64,128,1,float16,fp8,0,1.2449653148651123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,64,128,1,fp8,fp8,0,1.1500266393025715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,64,0,1,float16,fp8,0,1.6560427347819011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,64,0,1,fp8,fp8,0,1.5274826685587566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,64,128,1,float16,float16,0,1.2445226510365803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,64,0,1,float16,float16,0,1.6554560661315918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,64,128,1,float16,fp8,0,1.258512020111084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,64,128,1,fp8,fp8,0,1.1632906595865886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,64,128,1,float16,float16,0,0.7238293488820394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,64,0,1,float16,fp8,0,1.6686347325642903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,64,0,1,float16,float16,0,0.937877337137858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,64,0,1,fp8,fp8,0,1.5384213129679363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,64,128,1,float16,fp8,0,0.7361706892649332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,64,128,1,fp8,fp8,0,0.6965013345082601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,64,0,1,float16,fp8,0,0.9529173374176025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,64,0,1,fp8,fp8,0,0.8899733225504557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,64,128,1,float16,float16,0,0.627290685971578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,64,0,1,float16,float16,0,0.8346613248189291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,64,128,1,fp8,fp8,0,0.5858986775080363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,64,128,1,float16,fp8,0,0.6333706776301066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,64,0,1,float16,fp8,0,0.8408426443735758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,64,0,1,fp8,fp8,0,0.777184009552002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,64,128,1,float16,float16,0,0.6304959853490194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,64,128,1,float16,fp8,0,0.6363893349965414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,64,0,1,float16,float16,0,0.8412480354309082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,64,128,1,fp8,fp8,0,0.5911733309427897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,64,0,1,fp8,fp8,0,0.7828959623972574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,64,128,1,float16,float16,0,0.6339946587880453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,64,0,1,float16,fp8,0,0.8447893460591634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,64,128,1,fp8,fp8,0,0.5947680075963339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,64,128,1,float16,fp8,0,0.6402560075124105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,64,0,1,float16,float16,0,0.8440852959950765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,64,0,1,fp8,fp8,0,0.7852906386057535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,64,0,1,float16,fp8,0,0.8494613170623779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,64,128,1,float16,float16,0,0.6395946741104126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,64,128,1,fp8,fp8,0,0.6004480123519897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,64,128,1,float16,fp8,0,0.6454186836878458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,64,0,1,float16,float16,0,0.8483146826426188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,64,128,1,float16,float16,0,0.3787200053532918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,64,0,1,fp8,fp8,0,0.7936906814575195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,64,0,1,float16,fp8,0,0.8574240207672119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,64,0,1,float16,float16,0,0.49033598105112713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,64,128,1,float16,fp8,0,0.38794668515523273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,64,128,1,fp8,fp8,0,0.3675893147786458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,64,0,1,float16,fp8,0,0.4992693265279134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,64,128,1,float16,float16,0,0.3280160029729207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,64,0,1,fp8,fp8,0,0.46776533126831055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,64,128,1,float16,fp8,0,0.3312586744626363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,64,0,1,float16,float16,0,0.4355146487553914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,64,128,1,fp8,fp8,0,0.3139359951019287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,64,0,1,float16,fp8,0,0.4397013187408447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,64,0,1,fp8,fp8,0,0.41068800290425617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,64,128,1,float16,float16,0,0.3285813331604004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,64,0,1,float16,float16,0,0.437610665957133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,64,128,1,float16,fp8,0,0.33369600772857666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,64,128,1,fp8,fp8,0,0.3141706585884094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,64,0,1,float16,fp8,0,0.43908266226450604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,64,0,1,fp8,fp8,0,0.4147679805755615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,64,128,1,float16,float16,0,0.33322133620580036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,64,0,1,float16,float16,0,0.4412320057551066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,64,128,1,float16,fp8,0,0.3364906708399455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,64,128,1,fp8,fp8,0,0.3165866732597351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,64,0,1,float16,fp8,0,0.44328534603118896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,64,128,1,float16,float16,0,0.33658134937286377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,64,0,1,fp8,fp8,0,0.4152853488922119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,64,0,1,float16,float16,0,0.4437119960784912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,64,128,1,fp8,fp8,0,0.32071467240651447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,64,128,1,float16,fp8,0,0.33949331442515057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,64,0,1,float16,fp8,0,0.4480160077412923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,64,0,1,fp8,fp8,0,0.4203039805094401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,64,128,1,float16,float16,0,0.20608532428741455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,64,0,1,float16,float16,0,0.2661280035972595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,64,128,1,float16,fp8,0,0.21182932456334433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,64,128,1,fp8,fp8,0,0.20273600021998087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,64,0,1,float16,fp8,0,0.2716640035311381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,64,128,1,float16,float16,0,0.17705599466959634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,64,0,1,fp8,fp8,0,0.2568639914194743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,64,0,1,float16,float16,0,0.2350239952405294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,64,128,1,float16,fp8,0,0.1781866749127706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,64,128,1,fp8,fp8,0,0.17284266153971353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,64,0,1,float16,fp8,0,0.23662400245666504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,64,0,1,fp8,fp8,0,0.22671467065811157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,64,128,1,float16,float16,0,0.17691200971603394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,64,0,1,float16,float16,0,0.2350133260091146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,64,128,1,float16,fp8,0,0.17924267053604126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,64,128,1,fp8,fp8,0,0.17313067118326822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,64,0,1,float16,fp8,0,0.23640000820159912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,64,0,1,fp8,fp8,0,0.22636799017588297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,64,128,1,float16,float16,0,0.17882132530212402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,64,0,1,float16,float16,0,0.23624000946680704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,64,128,1,float16,fp8,0,0.18062400817871094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,64,128,1,fp8,fp8,0,0.17476266622543335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,64,0,1,float16,fp8,0,0.23727466662724814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,64,128,1,float16,float16,0,0.17989333470662436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,64,0,1,fp8,fp8,0,0.22934399048487344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,64,0,1,float16,float16,0,0.23810666799545288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,64,128,1,float16,fp8,0,0.18290666739145914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,64,128,1,fp8,fp8,0,0.17704532543818155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,64,0,1,float16,fp8,0,0.24173333247502646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,64,128,1,float16,float16,0,0.11993066469828288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,64,0,1,fp8,fp8,0,0.23061867554982504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,64,0,1,float16,float16,0,0.15252799789110819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,64,128,1,float16,fp8,0,0.12174399693806966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,64,128,1,fp8,fp8,0,0.12133866548538208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,64,0,1,float16,fp8,0,0.15616533160209656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,64,0,1,fp8,fp8,0,0.15058666467666626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,64,128,1,float16,float16,0,0.10618666807810466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,64,0,1,float16,float16,0,0.1381386617819468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,64,128,1,float16,fp8,0,0.10742933551470439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,64,128,1,fp8,fp8,0,0.10124799609184265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,64,0,1,float16,fp8,0,0.13922666509946188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,64,0,1,fp8,fp8,0,0.1297866702079773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,64,128,1,float16,float16,0,0.10718933741251628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,64,0,1,float16,float16,0,0.13823999961217245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,64,128,1,float16,fp8,0,0.10646399855613708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,64,128,1,fp8,fp8,0,0.0993226667245229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,64,0,1,float16,fp8,0,0.1395680010318756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,64,128,1,float16,float16,0,0.10730133454004924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,64,0,1,fp8,fp8,0,0.12989866733551025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,64,0,1,float16,float16,0,0.13805866241455078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,64,128,1,float16,fp8,0,0.1074133316675822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,64,128,1,fp8,fp8,0,0.10142933328946431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,64,0,1,float16,fp8,0,0.14019200205802917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,64,0,1,fp8,fp8,0,0.13061333696047464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,64,128,1,float16,float16,0,0.1072746713956197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,64,0,1,float16,float16,0,0.13831999897956848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,64,128,1,float16,fp8,0,0.10776533683141072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,64,128,1,fp8,fp8,0,0.10259200135866801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,64,0,1,float16,fp8,0,0.1388266682624817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,64,0,1,fp8,fp8,0,0.13277332981427512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,64,128,1,float16,float16,0,0.08684266606966655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,64,0,1,float16,float16,0,0.10738666852315266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,64,128,1,float16,fp8,0,0.08714666962623596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,64,128,1,fp8,fp8,0,0.08302400012811025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,64,0,1,float16,fp8,0,0.1074026624361674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,64,0,1,fp8,fp8,0,0.10250133275985718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,64,128,1,float16,float16,0,0.08682666222254436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,64,0,1,float16,float16,0,0.1088053286075592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,64,128,1,float16,fp8,0,0.0869760016600291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,64,128,1,fp8,fp8,0,0.08304533362388611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,64,0,1,float16,fp8,0,0.10854400197664897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,64,0,1,fp8,fp8,0,0.10317333539326985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,64,128,1,float16,float16,0,0.08842666943868001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,64,0,1,float16,float16,0,0.10763200124104817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,64,128,1,float16,fp8,0,0.08689600229263306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,64,128,1,fp8,fp8,0,0.08475733796755473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,64,0,1,float16,fp8,0,0.10733333230018616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,64,0,1,fp8,fp8,0,0.1034453312555949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,64,128,1,float16,float16,0,0.08664000034332275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,64,0,1,float16,float16,0,0.10786666472752889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,64,128,1,float16,fp8,0,0.08708799878756206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,64,128,1,fp8,fp8,0,0.08303466439247131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,64,0,1,float16,fp8,0,0.10911466677983601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,64,0,1,fp8,fp8,0,0.10312533378601074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,64,128,1,float16,float16,0,0.087226668993632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,64,0,1,float16,float16,0,0.1083573301633199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,64,128,1,float16,fp8,0,0.08690133690834045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,64,128,1,fp8,fp8,0,0.08480532964070638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,64,0,1,float16,fp8,0,0.10762133200963338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,64,0,1,fp8,fp8,0,0.1032319962978363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,64,128,1,float16,float16,0,2.929178555806478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,64,128,1,float16,fp8,0,2.90665594736735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,64,0,1,float16,float16,0,3.4496053059895835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,64,128,1,fp8,fp8,0,2.8668746948242188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,64,0,1,fp8,fp8,0,3.3476905822753906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,64,0,1,float16,fp8,0,3.429562568664551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,64,128,1,float16,float16,0,2.945647875467936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,64,0,1,float16,float16,0,3.474191983540853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,64,128,1,float16,fp8,0,2.9335412979125977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,64,128,1,fp8,fp8,0,2.950965245564779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,64,0,1,float16,fp8,0,3.457493464152018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,64,128,1,float16,float16,0,3.0391785303751626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,64,0,1,fp8,fp8,0,3.4334239959716797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,64,128,1,float16,fp8,0,2.9748799006144204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,64,0,1,float16,float16,0,3.5711841583251953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,64,128,1,fp8,fp8,0,2.9600960413614907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,64,0,1,float16,fp8,0,3.5383361180623374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,64,128,1,float16,float16,0,3.1321067810058594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,64,0,1,fp8,fp8,0,3.455135981241862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,64,0,1,float16,float16,0,3.657840092976888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,64,128,1,float16,fp8,0,3.0663201014200845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,64,128,1,fp8,fp8,0,3.1423091888427734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,64,0,1,float16,fp8,0,3.601674715677897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,64,128,1,float16,float16,0,1.614693323771159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,64,0,1,float16,float16,0,1.9207946459452312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,64,128,1,float16,fp8,0,1.6098613739013672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,64,0,1,fp8,fp8,0,3.6315733591715493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,64,128,1,fp8,fp8,0,1.631765365600586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,64,0,1,float16,fp8,0,1.8851307233174641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,64,0,1,fp8,fp8,0,1.8720533053080242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,64,128,1,float16,float16,0,1.4677972793579102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,64,0,1,float16,float16,0,1.739173412322998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,64,128,1,float16,fp8,0,1.4615146319071453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,64,128,1,fp8,fp8,0,1.4131253560384114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,64,0,1,float16,fp8,0,1.727359930674235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,64,0,1,fp8,fp8,0,1.667850653330485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,64,128,1,float16,float16,0,1.4785653750101726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,64,0,1,float16,float16,0,1.7438133557637532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,64,128,1,float16,fp8,0,1.4734880129496257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,64,128,1,fp8,fp8,0,1.4388960202534993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,64,0,1,float16,fp8,0,1.7401439348856609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,64,0,1,fp8,fp8,0,1.680341402689616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,64,128,1,float16,float16,0,1.4826134045918782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,64,0,1,float16,float16,0,1.7499146461486816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,64,128,1,float16,fp8,0,1.4805760383605957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,64,128,1,fp8,fp8,0,1.4431840578715007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,64,0,1,float16,fp8,0,1.7501120567321777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,64,0,1,fp8,fp8,0,1.6922133763631184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,64,128,1,float16,float16,0,1.5364960034688313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,64,0,1,float16,float16,0,1.7717493375142415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,64,128,1,float16,fp8,0,1.4973012606302898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,64,128,1,fp8,fp8,0,1.5740639368693035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,64,0,1,float16,fp8,0,1.7659519513448079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,64,128,1,float16,float16,0,0.7965280214945475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,64,0,1,fp8,fp8,0,1.8132692972819011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,64,0,1,float16,float16,0,0.9446346759796143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,64,128,1,float16,fp8,0,0.7875733375549316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,64,128,1,fp8,fp8,0,0.8187306722005209
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,64,0,1,float16,fp8,0,0.9290133317311605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,64,0,1,fp8,fp8,0,0.9527626832326254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,64,128,1,float16,float16,0,0.7442293167114258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,64,0,1,float16,float16,0,0.8774080276489258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,64,128,1,float16,fp8,0,0.741429328918457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,64,128,1,fp8,fp8,0,0.703285296758016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,64,0,1,float16,fp8,0,0.8752266565958658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,64,0,1,fp8,fp8,0,0.8261120319366455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,64,128,1,float16,float16,0,0.7507946491241455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,64,0,1,float16,float16,0,0.8842826684316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,64,128,1,float16,fp8,0,0.7473866939544678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,64,128,1,fp8,fp8,0,0.7225866317749023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,64,0,1,float16,fp8,0,0.8815306822458903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,64,0,1,fp8,fp8,0,0.8437279860178629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,64,128,1,float16,float16,0,0.7515093485514323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,64,0,1,float16,float16,0,0.885807991027832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,64,128,1,float16,fp8,0,0.7507999738057455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,64,128,1,fp8,fp8,0,0.7156586647033691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,64,128,1,float16,float16,0,0.7575413386027018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,64,0,1,float16,fp8,0,0.8836959997812907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,64,0,1,fp8,fp8,0,0.8380213578542074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,64,128,1,float16,fp8,0,0.7546719710032145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,64,128,1,fp8,fp8,0,0.7765546639760336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,64,0,1,float16,float16,0,0.8935413360595703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,64,128,1,float16,float16,0,0.4089013338088989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,64,0,1,float16,fp8,0,0.8907306989034017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,64,0,1,fp8,fp8,0,0.8965813318888346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,64,0,1,float16,float16,0,0.48368000984191895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,64,128,1,float16,fp8,0,0.4010453224182129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,64,128,1,fp8,fp8,0,0.4160319964090983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,64,0,1,float16,fp8,0,0.47679467995961505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,64,128,1,float16,float16,0,0.38224534193674725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,64,0,1,fp8,fp8,0,0.4819786548614502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,64,0,1,float16,float16,0,0.45032533009847003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,64,128,1,float16,fp8,0,0.38213332494099933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,64,128,1,fp8,fp8,0,0.3606346845626831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,64,0,1,float16,fp8,0,0.45163198312123615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,64,128,1,float16,float16,0,0.3839999834696452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,64,0,1,fp8,fp8,0,0.42371733983357746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,64,0,1,float16,float16,0,0.4500960111618042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,64,128,1,float16,fp8,0,0.3829333384831746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,64,128,1,fp8,fp8,0,0.3689546585083008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,64,0,1,float16,fp8,0,0.45003732045491535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,64,0,1,fp8,fp8,0,0.43164801597595215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,64,128,1,float16,float16,0,0.38524266084035236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,64,0,1,float16,float16,0,0.4543466567993164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,64,128,1,float16,fp8,0,0.3827626705169678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,64,128,1,fp8,fp8,0,0.3685866594314575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,64,0,1,float16,fp8,0,0.4549066623051961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,64,128,1,float16,float16,0,0.38818665345509845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,64,0,1,fp8,fp8,0,0.42896533012390137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,64,0,1,float16,float16,0,0.45979734261830646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,64,128,1,float16,fp8,0,0.3869866530100505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,64,128,1,fp8,fp8,0,0.3799733320871989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,64,0,1,float16,fp8,0,0.4575413465499878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,64,128,1,float16,float16,0,0.21741332610448202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,64,0,1,fp8,fp8,0,0.44203734397888184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,64,0,1,float16,float16,0,0.25757867097854614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,64,128,1,float16,fp8,0,0.21390400330225626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,64,128,1,fp8,fp8,0,0.21889066696166992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,64,0,1,float16,fp8,0,0.2523253361384074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,64,0,1,fp8,fp8,0,0.25360000133514404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,64,128,1,float16,float16,0,0.20098666350046793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,64,0,1,float16,float16,0,0.2378186583518982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,64,128,1,float16,fp8,0,0.19970667362213135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,64,128,1,fp8,fp8,0,0.1911626656850179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,64,0,1,float16,fp8,0,0.23778132597605386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,64,0,1,fp8,fp8,0,0.2253226637840271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,64,128,1,float16,float16,0,0.20026665925979614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,64,0,1,float16,float16,0,0.23688532908757529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,64,128,1,fp8,fp8,0,0.19481066862742105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,64,128,1,float16,fp8,0,0.2016106645266215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,64,0,1,float16,fp8,0,0.23657600084940592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,64,0,1,fp8,fp8,0,0.22756266593933105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,64,128,1,float16,float16,0,0.2025866707166036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,64,128,1,float16,fp8,0,0.20009599129358926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,64,0,1,float16,float16,0,0.23798932631810507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,64,128,1,fp8,fp8,0,0.19404266277949014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,64,0,1,float16,fp8,0,0.23844265937805176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,64,128,1,float16,float16,0,0.20410666863123575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,64,0,1,fp8,fp8,0,0.22801599899927774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,64,0,1,float16,float16,0,0.2400266726811727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,64,128,1,float16,fp8,0,0.2047040065129598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,64,128,1,fp8,fp8,0,0.19799466927846274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,64,0,1,float16,fp8,0,0.23938133319218954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,64,128,1,float16,float16,0,0.11975466211636861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,64,0,1,fp8,fp8,0,0.22835199038187662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,64,0,1,float16,float16,0,0.141184002161026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,64,128,1,float16,fp8,0,0.11629333098729451
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,64,128,1,fp8,fp8,0,0.12130666772524516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,64,0,1,float16,fp8,0,0.13845333456993103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,64,128,1,float16,float16,0,0.10673066973686218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,64,0,1,fp8,fp8,0,0.13823466499646506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,64,0,1,float16,float16,0,0.12847466270128885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,64,128,1,float16,fp8,0,0.10545066992441814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,64,128,1,fp8,fp8,0,0.10130133231480916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,64,0,1,float16,fp8,0,0.1281066636244456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,64,0,1,fp8,fp8,0,0.11998933553695679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,64,128,1,float16,float16,0,0.1060640017191569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,64,0,1,float16,float16,0,0.12784000237782797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,64,128,1,float16,fp8,0,0.10677867134412129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,64,128,1,fp8,fp8,0,0.1014453371365865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,64,0,1,float16,fp8,0,0.12807466586430868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,64,0,1,fp8,fp8,0,0.12157866358757019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,64,128,1,float16,float16,0,0.10707733035087585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,64,128,1,float16,fp8,0,0.10754133264223735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,64,0,1,float16,float16,0,0.12796266873677573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,64,128,1,fp8,fp8,0,0.10351999600728352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,64,0,1,fp8,fp8,0,0.12112533052762349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,64,0,1,float16,fp8,0,0.12799466649691263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,64,128,1,float16,float16,0,0.10828800002733867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,64,128,1,float16,fp8,0,0.10639466842015584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,64,0,1,float16,float16,0,0.13100799918174744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,64,128,1,fp8,fp8,0,0.10593600074450175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,64,128,1,float16,float16,0,0.0685280015071233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,64,0,1,float16,fp8,0,0.1281599998474121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,64,0,1,fp8,fp8,0,0.12379733721415202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,64,0,1,float16,float16,0,0.0809333324432373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,64,128,1,float16,fp8,0,0.06658666829268138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,64,128,1,fp8,fp8,0,0.07062399884064992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,64,0,1,float16,fp8,0,0.07941866914431255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,64,128,1,float16,float16,0,0.06452266871929169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,64,0,1,fp8,fp8,0,0.08235733211040497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,64,128,1,float16,fp8,0,0.06411733229955037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,64,0,1,float16,float16,0,0.07700799902280171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,64,128,1,fp8,fp8,0,0.062128002444903054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,64,0,1,fp8,fp8,0,0.07260799904664357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,64,0,1,float16,fp8,0,0.0759680022795995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,64,128,1,float16,float16,0,0.06458666423956554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,64,128,1,float16,fp8,0,0.06342400113741557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,64,128,1,fp8,fp8,0,0.06247999767462412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,64,0,1,float16,float16,0,0.0768746683994929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,64,0,1,float16,fp8,0,0.07630399862925212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,64,0,1,fp8,fp8,0,0.07276799778143565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,64,128,1,float16,float16,0,0.06428266565004985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,64,128,1,float16,fp8,0,0.0642133355140686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,64,128,1,fp8,fp8,0,0.06202666461467743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,64,0,1,float16,float16,0,0.07647466659545898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,64,0,1,float16,fp8,0,0.07660266757011414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,64,128,1,float16,float16,0,0.06411199768384297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,64,0,1,fp8,fp8,0,0.07239999870459239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,64,0,1,float16,float16,0,0.07621866464614868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,64,128,1,float16,fp8,0,0.064410666624705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,64,128,1,fp8,fp8,0,0.06311999758084615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,64,0,1,float16,fp8,0,0.07660266757011414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,64,128,1,float16,float16,0,0.04197866717974345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,64,0,1,fp8,fp8,0,0.07365866502126057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,64,0,1,float16,float16,0,0.050154666105906166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,64,128,1,float16,fp8,0,0.041797334949175514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,64,128,1,fp8,fp8,0,0.0394400010506312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,64,0,1,float16,fp8,0,0.0499839981396993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,64,0,1,fp8,fp8,0,0.05020800232887268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,64,128,1,float16,float16,0,0.039962666730086006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,64,0,1,float16,float16,0,0.047824000318845115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,64,128,1,float16,fp8,0,0.039642666776975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,64,128,1,fp8,fp8,0,0.03839999934037527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,64,0,1,float16,fp8,0,0.05023466547330221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,64,0,1,fp8,fp8,0,0.04763199885686239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,64,128,1,float16,float16,0,0.0396373321612676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,64,0,1,float16,float16,0,0.04980800052483877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,64,128,1,float16,fp8,0,0.03937600056330363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,64,128,1,fp8,fp8,0,0.03793066740036011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,64,0,1,float16,fp8,0,0.04991999765237173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,64,0,1,fp8,fp8,0,0.04637333254019419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,64,128,1,float16,float16,0,0.03957866628964742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,64,0,1,float16,float16,0,0.04980266590913137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,64,128,1,float16,fp8,0,0.03933866570393244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,64,128,1,fp8,fp8,0,0.03984000037113825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,64,0,1,float16,fp8,0,0.048309331138928734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,64,0,1,fp8,fp8,0,0.04663999875386556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,64,128,1,float16,float16,0,0.04043733328580856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,64,0,1,float16,float16,0,0.048341333866119385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,64,128,1,float16,fp8,0,0.03979733337958654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,64,128,1,fp8,fp8,0,0.03967999915281931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,64,0,1,float16,fp8,0,0.04800533254941305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,64,0,1,fp8,fp8,0,0.047968000173568726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,64,128,1,float16,float16,0,2.8423945109049478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,64,128,1,float16,fp8,0,2.819888114929199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,64,0,1,float16,float16,0,2.871631940205892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,64,128,1,fp8,fp8,0,2.776240030924479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,64,0,1,float16,fp8,0,2.862682660420736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,64,0,1,fp8,fp8,0,2.813392003377279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,64,128,1,float16,float16,0,2.8607571919759116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,64,0,1,float16,float16,0,2.894378662109375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,64,128,1,float16,fp8,0,2.8515520095825195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,64,128,1,fp8,fp8,0,2.8620853424072266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,64,0,1,float16,fp8,0,2.8877334594726562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,64,0,1,fp8,fp8,0,2.891397476196289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,64,128,1,float16,float16,0,2.9320958455403647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,64,0,1,float16,float16,0,2.9975786209106445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,64,128,1,float16,fp8,0,2.934453328450521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,64,128,1,fp8,fp8,0,2.8953653971354165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,64,0,1,float16,fp8,0,2.9370028177897134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,64,0,1,fp8,fp8,0,2.8991626103719077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,64,128,1,float16,float16,0,3.019184112548828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,64,0,1,float16,float16,0,3.056858698527018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,64,128,1,float16,fp8,0,2.974917411804199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,64,128,1,fp8,fp8,0,3.0581067403157554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,64,0,1,float16,fp8,0,3.034346580505371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,64,0,1,fp8,fp8,0,3.0895039240519204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,64,128,1,float16,float16,0,1.566912015279134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,64,0,1,float16,float16,0,1.593488057454427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,64,128,1,float16,fp8,0,1.5676053365071614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,64,128,1,fp8,fp8,0,1.5804959932963054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,64,0,1,float16,fp8,0,1.5838027000427246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,64,0,1,fp8,fp8,0,1.6022826830546062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,64,128,1,float16,float16,0,1.4306613604227703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,64,0,1,float16,float16,0,1.4501387278238933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,64,128,1,float16,fp8,0,1.4209334055582683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,64,128,1,fp8,fp8,0,1.3806986808776855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,64,0,1,float16,fp8,0,1.442922592163086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,64,0,1,fp8,fp8,0,1.3927146593729656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,64,128,1,float16,float16,0,1.4417440096537273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,64,0,1,float16,float16,0,1.4595786730448406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,64,128,1,float16,fp8,0,1.4352854092915852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,64,128,1,fp8,fp8,0,1.4082719484965007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,64,0,1,float16,fp8,0,1.4531572659810383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,64,0,1,fp8,fp8,0,1.418874740600586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,64,128,1,float16,float16,0,1.445141315460205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,64,0,1,float16,float16,0,1.4658719698588054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,64,128,1,float16,fp8,0,1.4414933522542317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,64,128,1,fp8,fp8,0,1.404607931772868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,64,0,1,float16,fp8,0,1.4638080596923828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,64,0,1,fp8,fp8,0,1.4351733525594075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,64,128,1,float16,float16,0,1.4568692843119304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,64,0,1,float16,float16,0,1.5064853032430012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,64,128,1,float16,fp8,0,1.454437255859375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,64,128,1,fp8,fp8,0,1.531551996866862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,64,0,1,float16,fp8,0,1.4711146354675293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,64,128,1,float16,float16,0,0.7744800249735514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,64,0,1,fp8,fp8,0,1.5474826494852703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,64,0,1,float16,float16,0,0.7905759811401367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,64,128,1,float16,fp8,0,0.7646079858144125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,64,128,1,fp8,fp8,0,0.7915786902109782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,64,0,1,float16,fp8,0,0.7855679988861084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,64,0,1,fp8,fp8,0,0.7995786666870117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,64,128,1,float16,float16,0,0.7246080239613851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,64,0,1,float16,float16,0,0.734058698018392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,64,128,1,float16,fp8,0,0.7213173707326254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,64,128,1,fp8,fp8,0,0.68394668896993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,64,0,1,float16,fp8,0,0.7324213186899821
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,64,0,1,fp8,fp8,0,0.6920426686604818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,64,128,1,float16,float16,0,0.7305386861165365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,64,0,1,float16,float16,0,0.7398719787597656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,64,128,1,float16,fp8,0,0.7283306916554769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,64,128,1,fp8,fp8,0,0.7003093560536703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,64,0,1,float16,fp8,0,0.7393333117167155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,64,0,1,fp8,fp8,0,0.7058239777882894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,64,128,1,float16,float16,0,0.7316053708394369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,64,0,1,float16,float16,0,0.7414240042368571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,64,128,1,float16,fp8,0,0.7295893033345541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,64,128,1,fp8,fp8,0,0.6966026624043783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,64,0,1,float16,fp8,0,0.7400586605072021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,64,0,1,fp8,fp8,0,0.7020479838053385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,64,128,1,float16,float16,0,0.7360479831695557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,64,0,1,float16,float16,0,0.7498719692230225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,64,128,1,float16,fp8,0,0.7363626956939697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,64,128,1,fp8,fp8,0,0.7579253514607748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,64,0,1,float16,fp8,0,0.7465066909790039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,64,0,1,fp8,fp8,0,0.7660693327585856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,64,128,1,float16,float16,0,0.39742934703826904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,64,0,1,float16,float16,0,0.40592531363169354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,64,128,1,float16,fp8,0,0.3917866547902425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,64,128,1,fp8,fp8,0,0.4044373432795207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,64,0,1,float16,fp8,0,0.399834672609965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,64,128,1,float16,float16,0,0.3709919850031535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,64,0,1,fp8,fp8,0,0.4094613393147786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,64,0,1,float16,float16,0,0.3770986795425415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,64,128,1,float16,fp8,0,0.37195201714833576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,64,128,1,fp8,fp8,0,0.3505226771036784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,64,0,1,fp8,fp8,0,0.35627734661102295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,64,0,1,float16,fp8,0,0.3758080005645752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,64,128,1,float16,float16,0,0.37281068166097003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,64,0,1,float16,float16,0,0.3781546751658122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,64,128,1,float16,fp8,0,0.3710399866104126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,64,128,1,fp8,fp8,0,0.3581013282140096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,64,0,1,float16,fp8,0,0.37571199735005695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,64,0,1,fp8,fp8,0,0.36114664872487384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,64,128,1,float16,float16,0,0.37402665615081787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,64,0,1,float16,float16,0,0.3772053321202596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,64,128,1,float16,fp8,0,0.3737013339996338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,64,128,1,fp8,fp8,0,0.3569920063018799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,64,0,1,fp8,fp8,0,0.35894934336344403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,64,0,1,float16,fp8,0,0.37884799639383954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,64,128,1,float16,float16,0,0.3784746726353963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,64,0,1,float16,float16,0,0.38502931594848633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,64,128,1,fp8,fp8,0,0.368069330851237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,64,128,1,float16,fp8,0,0.39372265338897705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,64,0,1,float16,fp8,0,0.3837973276774089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,64,128,1,float16,float16,0,0.2115359902381897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,64,0,1,float16,float16,0,0.21634133656819662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,64,0,1,fp8,fp8,0,0.3720906575520833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,64,128,1,float16,fp8,0,0.208624005317688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,64,128,1,fp8,fp8,0,0.21222933133443198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,64,0,1,float16,fp8,0,0.21255467335383096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,64,0,1,fp8,fp8,0,0.21528534094492593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,64,128,1,float16,float16,0,0.1968266765276591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,64,128,1,float16,fp8,0,0.1963520050048828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,64,0,1,float16,float16,0,0.19637866814931235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,64,128,1,fp8,fp8,0,0.18454400698343912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,64,0,1,float16,fp8,0,0.19861332575480142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,64,0,1,fp8,fp8,0,0.1881813406944275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,64,128,1,float16,float16,0,0.19416000445683798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,64,0,1,float16,float16,0,0.19708800315856934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,64,128,1,float16,fp8,0,0.19528534015019736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,64,128,1,fp8,fp8,0,0.1869973341623942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,64,0,1,float16,fp8,0,0.19741332530975342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,64,0,1,fp8,fp8,0,0.18932799498240152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,64,128,1,float16,float16,0,0.19564266999562582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,64,0,1,float16,float16,0,0.19792000452677408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,64,128,1,float16,fp8,0,0.19758933782577515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,64,128,1,fp8,fp8,0,0.18832000096638998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,64,0,1,float16,fp8,0,0.19791465997695923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,64,0,1,fp8,fp8,0,0.19024000565210977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,64,128,1,float16,float16,0,0.19775466124216715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,64,0,1,float16,float16,0,0.20019733905792236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,64,128,1,float16,fp8,0,0.19783467054367065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,64,128,1,fp8,fp8,0,0.19045867522557577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,64,0,1,float16,fp8,0,0.19855467478434244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,64,0,1,fp8,fp8,0,0.1914986570676168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,64,128,1,float16,float16,0,0.11405332883199056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,64,0,1,float16,float16,0,0.11729066570599873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,64,128,1,float16,fp8,0,0.11357866724332173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,64,128,1,fp8,fp8,0,0.11645866433779399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,64,0,1,float16,fp8,0,0.11658666531244914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,64,0,1,fp8,fp8,0,0.11783466736475627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,64,128,1,float16,float16,0,0.10322133700052898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,64,0,1,float16,float16,0,0.10723732908566792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,64,128,1,float16,fp8,0,0.10549333691596985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,64,128,1,fp8,fp8,0,0.09887466828028361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,64,0,1,float16,fp8,0,0.10528000195821126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,64,128,1,float16,float16,0,0.10507200161616008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,64,0,1,fp8,fp8,0,0.10108799735705058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,64,0,1,float16,float16,0,0.10546666383743286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,64,128,1,float16,fp8,0,0.10407466689745586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,64,128,1,fp8,fp8,0,0.1011199951171875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,64,0,1,float16,fp8,0,0.10541333754857381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,64,0,1,fp8,fp8,0,0.10062932968139648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,64,128,1,float16,float16,0,0.10557333628336589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,64,0,1,float16,float16,0,0.10568533341089885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,64,128,1,float16,fp8,0,0.10428800185521443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,64,128,1,fp8,fp8,0,0.10065600275993347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,64,0,1,float16,fp8,0,0.1074666678905487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,64,0,1,fp8,fp8,0,0.10215999682744344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,64,128,1,float16,float16,0,0.10528000195821126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,64,0,1,float16,float16,0,0.10872000455856323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,64,128,1,float16,fp8,0,0.10579733053843181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,64,128,1,fp8,fp8,0,0.1029813289642334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,64,0,1,float16,fp8,0,0.1074720025062561
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,64,128,1,float16,float16,0,0.06554666658242543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,64,0,1,fp8,fp8,0,0.10478400190671285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,64,0,1,float16,float16,0,0.06630399823188782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,64,128,1,float16,fp8,0,0.06656000018119812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,64,128,1,fp8,fp8,0,0.0690719981988271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,64,0,1,float16,fp8,0,0.0655626654624939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,64,0,1,fp8,fp8,0,0.06857599814732869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,64,128,1,float16,float16,0,0.06225066880385081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,64,0,1,float16,float16,0,0.062208001812299095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,64,128,1,float16,fp8,0,0.06195733447869619
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,64,128,1,fp8,fp8,0,0.06002666552861532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,64,0,1,float16,fp8,0,0.06225599845250448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,64,0,1,fp8,fp8,0,0.060234665870666504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,64,128,1,float16,float16,0,0.06237333516279856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,64,0,1,float16,float16,0,0.06244266529877981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,64,128,1,float16,fp8,0,0.06321600079536438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,64,128,1,fp8,fp8,0,0.06015466650327047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,64,0,1,float16,fp8,0,0.06363733112812042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,64,0,1,fp8,fp8,0,0.061759998401006065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,64,128,1,float16,float16,0,0.062463998794555664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,64,0,1,float16,float16,0,0.06333333253860474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,64,128,1,float16,fp8,0,0.06302399933338165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,64,0,1,float16,fp8,0,0.06235733131567637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,64,128,1,fp8,fp8,0,0.06128533184528351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,64,0,1,fp8,fp8,0,0.062165334820747375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,64,128,1,float16,float16,0,0.062463998794555664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,64,0,1,float16,float16,0,0.0631466656923294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,64,128,1,float16,fp8,0,0.06222933530807495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,64,128,1,fp8,fp8,0,0.06178666651248932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,64,0,1,float16,fp8,0,0.0626986672480901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,64,0,1,fp8,fp8,0,0.061648001273473106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,64,128,1,float16,float16,0,0.041696002086003624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,64,0,1,float16,float16,0,0.04161066561937332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,64,128,1,float16,fp8,0,0.039887999494870506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,64,128,1,fp8,fp8,0,0.041722665230433144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,64,0,1,float16,fp8,0,0.04171733558177948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,64,0,1,fp8,fp8,0,0.041509332756201424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,64,0,1,float16,float16,0,0.04151466737190882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,64,128,1,float16,float16,0,0.03977599988381068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,64,128,1,float16,fp8,0,0.039818666875362396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,64,0,1,float16,fp8,0,0.039594667653242745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,64,128,1,fp8,fp8,0,0.03945599993069967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,64,0,1,fp8,fp8,0,0.03998400022586187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,64,128,1,float16,float16,0,0.03961066653331121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,64,0,1,float16,float16,0,0.039893334110577904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,64,128,1,float16,fp8,0,0.03948266555865606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,64,0,1,float16,fp8,0,0.04153066625197729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,64,128,1,fp8,fp8,0,0.037615999579429626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,64,0,1,fp8,fp8,0,0.03977066775163015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,64,128,1,float16,float16,0,0.039450667798519135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,64,0,1,float16,float16,0,0.04164266586303711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,64,128,1,float16,fp8,0,0.03948266555865606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,64,128,1,fp8,fp8,0,0.039647998909155525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,64,0,1,float16,fp8,0,0.03984000037113825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,64,0,1,fp8,fp8,0,0.03965866565704346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,64,128,1,float16,float16,0,0.03961600114901861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,64,0,1,float16,float16,0,0.03968533376852671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,64,128,1,float16,fp8,0,0.03955200066169103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,64,128,1,fp8,fp8,0,0.03834133346875509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,64,0,1,float16,fp8,0,0.04163199911514918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,64,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,64,128,1,float16,float16,0,0.027210667729377747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,64,0,1,float16,float16,0,0.02734400083621343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,64,128,1,float16,fp8,0,0.02784000088771184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,64,128,1,fp8,fp8,0,0.027493332823117573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,64,0,1,float16,fp8,0,0.027786667148272198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,64,0,1,fp8,fp8,0,0.027376001079877216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,64,128,1,float16,float16,0,0.025610665480295818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,64,0,1,float16,float16,0,0.02731200059254964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,64,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,64,128,1,fp8,fp8,0,0.02515733242034912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,64,0,1,float16,fp8,0,0.026682667434215546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,64,0,1,fp8,fp8,0,0.02743999908367793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,64,128,1,float16,float16,0,0.025279998779296875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,64,0,1,float16,float16,0,0.025642665723959606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,64,128,1,float16,fp8,0,0.02714666724205017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,64,128,1,fp8,fp8,0,0.025663999219735462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,64,0,1,float16,fp8,0,0.02731200059254964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,64,0,1,fp8,fp8,0,0.02749866743882497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,64,128,1,float16,float16,0,0.027248000105222065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,64,0,1,float16,float16,0,0.025461333493391674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,64,128,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,64,128,1,fp8,fp8,0,0.02719466636578242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,64,0,1,float16,fp8,0,0.027349332968393963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,64,0,1,fp8,fp8,0,0.027221334477265675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,64,128,1,float16,float16,0,0.025258667767047882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,64,0,1,float16,float16,0,0.02758399893840154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,64,128,1,float16,fp8,0,0.025477332373460133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,64,128,1,fp8,fp8,0,0.02555199960867564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,64,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,64,0,1,fp8,fp8,0,0.026698666314284008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,64,128,1,float16,float16,0,1.3343520164489746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,64,0,1,float16,float16,0,1.3008853594462078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,64,128,1,float16,fp8,0,1.329301357269287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,64,128,1,fp8,fp8,0,1.2735040187835693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,64,0,1,float16,fp8,0,1.298847993214925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,64,0,1,fp8,fp8,0,1.2452267011006672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,64,128,1,float16,float16,0,1.33022936185201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,64,0,1,float16,float16,0,1.301477352778117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,64,128,1,float16,fp8,0,1.3262240091959636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,64,128,1,fp8,fp8,0,1.2990399996439617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,64,0,1,fp8,fp8,0,1.2603253523508708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,64,128,1,float16,float16,0,1.3392747243245442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,64,0,1,float16,fp8,0,1.294874668121338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,64,0,1,float16,float16,0,1.310261329015096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,64,128,1,float16,fp8,0,1.3320639928181965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,64,128,1,fp8,fp8,0,1.320197343826294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,64,0,1,float16,fp8,0,1.3060266971588135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,64,0,1,fp8,fp8,0,1.279530684153239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,64,128,1,float16,float16,0,1.3701066970825195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,64,0,1,float16,float16,0,1.3269226551055908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,64,128,1,float16,fp8,0,1.3515466054280598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,64,128,1,float16,float16,0,0.7259946664174398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,64,128,1,fp8,fp8,0,1.4073972702026367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,64,0,1,float16,fp8,0,1.3233173688252766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,64,0,1,fp8,fp8,0,1.3774293263753254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,64,0,1,float16,float16,0,0.7124640146891276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,64,128,1,float16,fp8,0,0.7168533007303873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,64,128,1,fp8,fp8,0,0.7376853624979655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,64,0,1,float16,fp8,0,0.7031733194986979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,64,128,1,float16,float16,0,0.6739466985066732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,64,0,1,fp8,fp8,0,0.7220906416575114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,64,0,1,float16,float16,0,0.6560746828715006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,64,128,1,float16,fp8,0,0.6758453051249186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,64,128,1,fp8,fp8,0,0.6304746468861898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,64,0,1,float16,fp8,0,0.6551520029703776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,64,0,1,fp8,fp8,0,0.617194652557373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,64,128,1,float16,float16,0,0.6707839965820312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,64,0,1,float16,float16,0,0.6577066580454508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,64,128,1,float16,fp8,0,0.6694080034891764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,64,128,1,fp8,fp8,0,0.6488159894943237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,64,0,1,float16,fp8,0,0.6555200020472208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,64,0,1,fp8,fp8,0,0.6307253440221151
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,64,128,1,float16,float16,0,0.6760373115539551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,64,0,1,float16,float16,0,0.6616693337758383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,64,128,1,float16,fp8,0,0.6747360229492188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,64,128,1,fp8,fp8,0,0.6439093351364136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,64,0,1,fp8,fp8,0,0.6304906606674194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,64,0,1,float16,fp8,0,0.660378654797872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,64,128,1,float16,float16,0,0.6849706967671713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,64,0,1,float16,float16,0,0.6727573076883951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,64,128,1,float16,fp8,0,0.6771199703216553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,64,128,1,fp8,fp8,0,0.7010453542073568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,64,0,1,float16,fp8,0,0.6661973396937052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,64,128,1,float16,float16,0,0.3703146775563558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,64,0,1,float16,float16,0,0.3652106523513794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,64,0,1,fp8,fp8,0,0.6802133719126383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,64,128,1,float16,fp8,0,0.3669866720835368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,64,0,1,float16,fp8,0,0.35838401317596436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,64,128,1,fp8,fp8,0,0.37997865676879883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,64,128,1,float16,float16,0,0.3460586468378703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,64,0,1,fp8,fp8,0,0.37196266651153564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,64,0,1,float16,float16,0,0.335749348004659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,64,128,1,float16,fp8,0,0.342410683631897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,64,128,1,fp8,fp8,0,0.32470933596293133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,64,0,1,float16,fp8,0,0.3371093273162842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,64,0,1,fp8,fp8,0,0.31568533182144165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,64,128,1,float16,float16,0,0.3428586721420288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,64,0,1,float16,float16,0,0.33558400472005206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,64,128,1,float16,fp8,0,0.34098132451375324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,64,128,1,fp8,fp8,0,0.3321066697438558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,64,0,1,float16,fp8,0,0.3342026472091675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,64,0,1,fp8,fp8,0,0.32412266731262207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,64,128,1,float16,float16,0,0.3457119862238566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,64,0,1,float16,float16,0,0.33902398745218915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,64,128,1,float16,fp8,0,0.343509316444397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,64,128,1,fp8,fp8,0,0.3324693242708842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,64,0,1,float16,fp8,0,0.33956801891326904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,64,0,1,fp8,fp8,0,0.3240106701850891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,64,128,1,float16,float16,0,0.350874662399292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,64,0,1,float16,float16,0,0.3447253306706746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,64,128,1,float16,fp8,0,0.34778134028116864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,64,128,1,fp8,fp8,0,0.3408586581548055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,64,0,1,float16,fp8,0,0.34226667881011963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,64,128,1,float16,float16,0,0.19793067375818887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,64,0,1,fp8,fp8,0,0.3347359895706177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,64,0,1,float16,float16,0,0.1939199964205424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,64,128,1,float16,fp8,0,0.19374932845433554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,64,128,1,fp8,fp8,0,0.20001065731048584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,64,0,1,float16,fp8,0,0.19115734100341797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,64,0,1,fp8,fp8,0,0.1978293259938558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,64,128,1,float16,float16,0,0.18156800667444864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,64,0,1,float16,float16,0,0.1788426637649536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,64,128,1,float16,fp8,0,0.18345065911610922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,64,128,1,fp8,fp8,0,0.1726293365160624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,64,0,1,float16,fp8,0,0.17877866824467978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,64,0,1,fp8,fp8,0,0.16943466663360596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,64,128,1,float16,float16,0,0.18104533354441324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,64,0,1,float16,float16,0,0.17779199282328287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,64,128,1,float16,fp8,0,0.18125333388646445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,64,128,1,fp8,fp8,0,0.1748853325843811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,64,0,1,float16,fp8,0,0.17720532417297363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,64,0,1,fp8,fp8,0,0.1697333256403605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,64,128,1,float16,float16,0,0.18102399508158365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,64,0,1,float16,float16,0,0.17818133036295572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,64,128,1,float16,fp8,0,0.1819093426068624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,64,128,1,fp8,fp8,0,0.1750346620877584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,64,0,1,float16,fp8,0,0.17829332749048868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,64,0,1,fp8,fp8,0,0.17034133275349936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,64,128,1,float16,float16,0,0.18459200859069824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,64,0,1,float16,float16,0,0.18036800622940063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,64,128,1,float16,fp8,0,0.18389334281285605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,64,128,1,fp8,fp8,0,0.17858133713404337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,64,0,1,float16,fp8,0,0.18047465880711874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,64,0,1,fp8,fp8,0,0.17501866817474365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,64,128,1,float16,float16,0,0.10583466291427612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,64,128,1,float16,fp8,0,0.10520000259081523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,64,0,1,float16,float16,0,0.10455466310183208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,64,128,1,fp8,fp8,0,0.11018666625022888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,64,0,1,float16,fp8,0,0.10147733489672343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,64,0,1,fp8,fp8,0,0.10762133200963338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,64,0,1,float16,float16,0,0.09521599610646565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,64,128,1,float16,float16,0,0.09650133053461711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,64,128,1,float16,fp8,0,0.09563199679056804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,64,128,1,fp8,fp8,0,0.09264000256856282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,64,0,1,float16,fp8,0,0.09516266981760661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,64,128,1,float16,float16,0,0.0953439970811208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,64,0,1,fp8,fp8,0,0.0906880001227061
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,64,0,1,float16,float16,0,0.09333333373069763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,64,128,1,float16,fp8,0,0.09612799684206645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,64,128,1,fp8,fp8,0,0.09275733431180318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,64,0,1,fp8,fp8,0,0.08893866340319316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,64,0,1,float16,fp8,0,0.09537067015965779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,64,128,1,float16,float16,0,0.09719467163085938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,64,0,1,float16,float16,0,0.09521599610646565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,64,128,1,float16,fp8,0,0.09715200463930766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,64,128,1,fp8,fp8,0,0.09551466504732768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,64,0,1,float16,fp8,0,0.09520533680915833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,64,0,1,fp8,fp8,0,0.09161600470542908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,64,128,1,float16,float16,0,0.09789866209030151
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,64,128,1,float16,fp8,0,0.09929600358009338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,64,0,1,float16,float16,0,0.0965280036131541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,64,128,1,fp8,fp8,0,0.09533333778381348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,64,0,1,float16,fp8,0,0.09516800443331401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,64,0,1,fp8,fp8,0,0.09288533528645833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,64,128,1,float16,float16,0,0.06257066627343495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,64,0,1,float16,float16,0,0.060271998246510826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,64,128,1,float16,fp8,0,0.061247999469439186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,64,128,1,fp8,fp8,0,0.06592000027497609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,64,0,1,float16,fp8,0,0.06031466523806254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,64,0,1,fp8,fp8,0,0.063509335120519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,64,128,1,float16,float16,0,0.05819733440876007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,64,0,1,float16,float16,0,0.0576800008614858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,64,128,1,fp8,fp8,0,0.05612266560395559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,64,128,1,float16,fp8,0,0.059952000776926674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,64,0,1,float16,fp8,0,0.056927998860677086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,64,0,1,fp8,fp8,0,0.054901331663131714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,64,128,1,float16,float16,0,0.05816000203291575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,64,0,1,float16,float16,0,0.05856533348560333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,64,128,1,float16,fp8,0,0.05978133281071981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,64,128,1,fp8,fp8,0,0.05806399881839752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,64,0,1,float16,fp8,0,0.058261334896087646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,64,128,1,float16,float16,0,0.059445331494013466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,64,0,1,fp8,fp8,0,0.05506666501363119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,64,0,1,float16,float16,0,0.05806933343410492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,64,128,1,float16,fp8,0,0.05966933568318685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,64,128,1,fp8,fp8,0,0.056618665655454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,64,0,1,float16,fp8,0,0.05801600217819214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,64,0,1,fp8,fp8,0,0.054917335510253906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,64,128,1,float16,float16,0,0.058261334896087646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,64,0,1,float16,float16,0,0.05831466615200043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,64,128,1,float16,fp8,0,0.058058664202690125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,64,128,1,fp8,fp8,0,0.05759466687838236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,64,0,1,float16,fp8,0,0.0573226660490036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,64,0,1,fp8,fp8,0,0.05677333474159241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,64,128,1,float16,float16,0,0.03823466598987579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,64,0,1,float16,float16,0,0.03661333272854487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,64,128,1,float16,fp8,0,0.03794133414824804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,64,128,1,fp8,fp8,0,0.03852266569932302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,64,0,1,float16,fp8,0,0.03738666574160258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,64,0,1,fp8,fp8,0,0.03782399992148081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,64,128,1,float16,float16,0,0.037690666814645134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,64,0,1,float16,float16,0,0.03584533433119456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,64,128,1,float16,fp8,0,0.037445334096749626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,64,128,1,fp8,fp8,0,0.037045332292715706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,64,0,1,float16,fp8,0,0.03688533355792364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,64,0,1,fp8,fp8,0,0.03512533257404963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,64,128,1,float16,float16,0,0.03601066768169403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,64,0,1,float16,float16,0,0.036042665441830955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,64,128,1,float16,fp8,0,0.03579200059175491
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,64,128,1,fp8,fp8,0,0.035760000348091125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,64,0,1,float16,fp8,0,0.03576533248027166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,64,0,1,fp8,fp8,0,0.035002666215101876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,64,128,1,float16,float16,0,0.03615466753641764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,64,0,1,float16,float16,0,0.035402665535608925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,64,128,1,float16,fp8,0,0.035829332967599235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,64,128,1,fp8,fp8,0,0.03738133360942205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,64,0,1,float16,fp8,0,0.03545066714286804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,64,0,1,fp8,fp8,0,0.03585600107908249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,64,128,1,float16,float16,0,0.037717332442601524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,64,0,1,float16,float16,0,0.03554133325815201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,64,128,1,float16,fp8,0,0.03751999884843826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,64,128,1,fp8,fp8,0,0.03762666632731756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,64,0,1,float16,fp8,0,0.036490666369597115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,64,0,1,fp8,fp8,0,0.035802667339642845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,64,128,1,float16,float16,0,0.025424001117547352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,64,0,1,float16,float16,0,0.02550933261712392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,64,128,1,float16,fp8,0,0.025487999121348064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,64,128,1,fp8,fp8,0,0.025722667574882507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,64,0,1,float16,fp8,0,0.025077333052953083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,64,0,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,64,128,1,float16,float16,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,64,0,1,float16,float16,0,0.025205334027608235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,64,128,1,float16,fp8,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,64,128,1,fp8,fp8,0,0.02418133368094762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,64,0,1,float16,fp8,0,0.02516266703605652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,64,0,1,fp8,fp8,0,0.02442666639884313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,64,128,1,float16,float16,0,0.023925334215164185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,64,0,1,float16,float16,0,0.025578667720158894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,64,128,1,float16,fp8,0,0.02518933266401291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,64,128,1,fp8,fp8,0,0.02521066615978877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,64,0,1,float16,fp8,0,0.025402667621771496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,64,0,1,fp8,fp8,0,0.023200000325838726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,64,128,1,float16,float16,0,0.02548266698916753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,64,0,1,float16,float16,0,0.02346666653951009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,64,128,1,float16,fp8,0,0.023386667172114056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,64,128,1,fp8,fp8,0,0.025616000096003216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,64,0,1,float16,fp8,0,0.023706667125225067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,64,0,1,fp8,fp8,0,0.023226665953795116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,64,128,1,float16,float16,0,0.026021334032217663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,64,0,1,float16,float16,0,0.023242667317390442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,64,128,1,float16,fp8,0,0.023749334116776783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,64,128,1,fp8,fp8,0,0.025050667424996693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,64,0,1,float16,fp8,0,0.02334933231274287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,64,0,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,64,128,1,float16,float16,0,0.02109333376089732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,64,0,1,float16,float16,0,0.021562665700912476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,64,128,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,64,128,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,64,0,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,64,0,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,64,128,1,float16,float16,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,64,0,1,float16,float16,0,0.021338666478792827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,64,128,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,64,128,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,64,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,64,0,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,64,128,1,float16,float16,0,0.021242665747801464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,64,0,1,float16,float16,0,0.021066665649414062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,64,128,1,float16,fp8,0,0.021359999974568684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,64,128,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,64,0,1,float16,fp8,0,0.021205333371957142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,64,0,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,64,128,1,float16,float16,0,0.02110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,64,0,1,float16,float16,0,0.021349333226680756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,64,128,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,64,128,1,fp8,fp8,0,0.021525333325068157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,64,0,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,64,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,64,128,1,float16,float16,0,0.02144533395767212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,64,0,1,float16,float16,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,64,128,1,float16,fp8,0,0.02130666623512904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,64,128,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,64,0,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,64,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,64,128,1,float16,float16,0,0.7058826287587484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,64,0,1,float16,float16,0,0.7062026659647623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,64,128,1,float16,fp8,0,0.7051786581675211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,64,128,1,fp8,fp8,0,0.6720213095347086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,64,0,1,float16,fp8,0,0.7019999821980795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,64,128,1,float16,float16,0,0.7065653006235758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,64,0,1,fp8,fp8,0,0.6731839974721273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,64,0,1,float16,float16,0,0.7035840352376302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,64,128,1,float16,fp8,0,0.7085386912027994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,64,0,1,float16,fp8,0,0.702842632929484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,64,128,1,fp8,fp8,0,0.6816319624582926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,64,0,1,fp8,fp8,0,0.682213306427002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,64,128,1,float16,float16,0,0.7108480135599772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,64,0,1,float16,float16,0,0.7141546408335367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,64,128,1,float16,fp8,0,0.707103967666626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,64,128,1,fp8,fp8,0,0.6712799866994222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,64,0,1,float16,fp8,0,0.7062986691792806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,64,0,1,fp8,fp8,0,0.679365317026774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,64,128,1,float16,float16,0,0.7160960038503011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,64,0,1,float16,float16,0,0.7158613204956055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,64,128,1,fp8,fp8,0,0.7307519912719727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,64,0,1,float16,fp8,0,0.7122346560160319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,64,128,1,float16,fp8,0,0.7106826305389404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,64,0,1,fp8,fp8,0,0.7273706595102946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,64,128,1,float16,float16,0,0.3884906768798828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,64,0,1,float16,float16,0,0.38916265964508057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,64,128,1,float16,fp8,0,0.3829546769460042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,64,128,1,fp8,fp8,0,0.3933226664861043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,64,0,1,float16,fp8,0,0.38131733735402423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,64,0,1,fp8,fp8,0,0.3917920192082723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,64,128,1,float16,float16,0,0.3624853293100993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,64,0,1,float16,float16,0,0.36051734288533527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,64,128,1,float16,fp8,0,0.3592746655146281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,64,128,1,fp8,fp8,0,0.34169065952301025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,64,0,1,fp8,fp8,0,0.34202667077382404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,64,0,1,float16,fp8,0,0.3609973192214966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,64,128,1,float16,float16,0,0.36021331946055096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,64,0,1,float16,float16,0,0.35917866230010986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,64,128,1,fp8,fp8,0,0.3482613166173299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,64,128,1,float16,fp8,0,0.3591466744740804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,64,0,1,float16,fp8,0,0.35759464899698895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,64,0,1,fp8,fp8,0,0.3471200068791707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,64,128,1,float16,float16,0,0.36240533987681073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,64,0,1,float16,float16,0,0.3625760078430176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,64,128,1,float16,fp8,0,0.36005866527557373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,64,128,1,fp8,fp8,0,0.3457333246866862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,64,0,1,float16,fp8,0,0.36116798718770343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,64,0,1,fp8,fp8,0,0.34638933340708417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,64,0,1,float16,float16,0,0.36630932490030926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,64,128,1,float16,float16,0,0.3667573531468709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,64,128,1,float16,fp8,0,0.3638240098953247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,64,128,1,fp8,fp8,0,0.35469865798950195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,64,0,1,float16,fp8,0,0.36185065905253094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,64,0,1,fp8,fp8,0,0.35790932178497314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,64,128,1,float16,float16,0,0.20573333899180093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,64,0,1,float16,float16,0,0.20383467276891074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,64,128,1,float16,fp8,0,0.19950399796168009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,64,128,1,fp8,fp8,0,0.20653333266576132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,64,0,1,float16,fp8,0,0.19982399543126425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,64,0,1,fp8,fp8,0,0.20543466011683145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,64,128,1,float16,float16,0,0.18974934021631876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,64,0,1,float16,float16,0,0.1904159982999166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,64,128,1,float16,fp8,0,0.18954666455586752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,64,128,1,fp8,fp8,0,0.1793173352877299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,64,0,1,float16,fp8,0,0.1888693372408549
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,64,0,1,fp8,fp8,0,0.17876267433166504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,64,128,1,float16,float16,0,0.1888693372408549
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,64,0,1,float16,float16,0,0.1893493334452311
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,64,128,1,float16,fp8,0,0.18785067399342856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,64,128,1,fp8,fp8,0,0.18052800496419272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,64,0,1,float16,fp8,0,0.18777066469192505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,64,0,1,fp8,fp8,0,0.17989333470662436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,64,128,1,float16,float16,0,0.18979734182357788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,64,0,1,float16,float16,0,0.18961066007614136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,64,128,1,float16,fp8,0,0.19023466110229492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,64,128,1,fp8,fp8,0,0.17901867628097534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,64,0,1,float16,fp8,0,0.18869332472483316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,64,0,1,fp8,fp8,0,0.18026133378346762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,64,128,1,float16,float16,0,0.19271999597549438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,64,0,1,float16,float16,0,0.19131199518839517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,64,128,1,float16,fp8,0,0.18889067570368448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,64,128,1,fp8,fp8,0,0.18503467241923013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,64,0,1,float16,fp8,0,0.19208000103632608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,64,0,1,fp8,fp8,0,0.18389866749445596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,64,128,1,float16,float16,0,0.10950932900110881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,64,0,1,float16,float16,0,0.11190932989120483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,64,128,1,float16,fp8,0,0.10944533348083496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,64,128,1,fp8,fp8,0,0.11346133550008138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,64,0,1,float16,fp8,0,0.10834667086601257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,64,0,1,fp8,fp8,0,0.11375466982523601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,64,128,1,float16,float16,0,0.10110933581988017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,64,0,1,float16,float16,0,0.10231467088063557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,64,128,1,float16,fp8,0,0.10171199838320415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,64,128,1,fp8,fp8,0,0.09623466928799947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,64,0,1,float16,fp8,0,0.1013813316822052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,64,0,1,fp8,fp8,0,0.09551999966303508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,64,128,1,float16,float16,0,0.10098133484522502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,64,0,1,float16,float16,0,0.1011306643486023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,64,128,1,float16,fp8,0,0.10121599833170573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,64,128,1,fp8,fp8,0,0.09523199995358785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,64,0,1,float16,fp8,0,0.10140267014503479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,64,0,1,fp8,fp8,0,0.09524266918500264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,64,128,1,float16,float16,0,0.10288533568382263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,64,0,1,float16,float16,0,0.10248532891273499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,64,128,1,float16,fp8,0,0.10272000233332317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,64,128,1,fp8,fp8,0,0.09768533706665039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,64,0,1,float16,fp8,0,0.10142399867375691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,64,0,1,fp8,fp8,0,0.09709866841634114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,64,128,1,float16,float16,0,0.10408000151316325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,64,0,1,float16,float16,0,0.103493332862854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,64,128,1,float16,fp8,0,0.1030453344186147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,64,128,1,fp8,fp8,0,0.10063466429710388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,64,0,1,float16,fp8,0,0.10347732901573181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,64,0,1,fp8,fp8,0,0.099589337905248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,64,128,1,float16,float16,0,0.06218666831652323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,64,0,1,float16,float16,0,0.06398400167624156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,64,128,1,float16,fp8,0,0.062165334820747375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,64,128,1,fp8,fp8,0,0.06583466629187266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,64,0,1,float16,fp8,0,0.06213866670926412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,64,0,1,fp8,fp8,0,0.06647466619809468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,64,128,1,float16,float16,0,0.05903466542561849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,64,0,1,float16,float16,0,0.05834133426348368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,64,128,1,float16,fp8,0,0.060565332571665444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,64,128,1,fp8,fp8,0,0.0562720000743866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,64,0,1,float16,fp8,0,0.05880000193913778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,64,0,1,fp8,fp8,0,0.05793599784374237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,64,128,1,float16,float16,0,0.060234665870666504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,64,0,1,float16,float16,0,0.05831466615200043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,64,128,1,float16,fp8,0,0.05858666698137919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,64,128,1,fp8,fp8,0,0.05796800057093302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,64,0,1,float16,fp8,0,0.05992533266544342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,64,0,1,fp8,fp8,0,0.05620799958705902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,64,128,1,float16,float16,0,0.060309335589408875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,64,0,1,float16,float16,0,0.05996266504128774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,64,128,1,float16,fp8,0,0.058837334314982094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,64,128,1,fp8,fp8,0,0.05836800237496694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,64,0,1,float16,fp8,0,0.05964800218741099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,64,0,1,fp8,fp8,0,0.05681600173314413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,64,128,1,float16,float16,0,0.05857066810131073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,64,0,1,float16,float16,0,0.06018133461475372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,64,128,1,float16,fp8,0,0.060047999024391174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,64,128,1,fp8,fp8,0,0.057215998570124306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,64,0,1,float16,fp8,0,0.05793599784374237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,64,0,1,fp8,fp8,0,0.058415999015172325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,64,128,1,float16,float16,0,0.03965333352486292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,64,0,1,float16,float16,0,0.037808001041412354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,64,128,1,float16,fp8,0,0.03969600051641464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,64,128,1,fp8,fp8,0,0.03955733279387156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,64,0,1,float16,fp8,0,0.03823466598987579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,64,0,1,fp8,fp8,0,0.03956266740957896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,64,128,1,float16,float16,0,0.03756266583998998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,64,0,1,float16,float16,0,0.03741333385308584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,64,128,1,float16,fp8,0,0.03758399933576584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,64,128,1,fp8,fp8,0,0.03612799942493439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,64,0,1,float16,fp8,0,0.03774933268626531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,64,0,1,fp8,fp8,0,0.03604800005753835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,64,128,1,float16,float16,0,0.03741333385308584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,64,0,1,float16,float16,0,0.037530665596326195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,64,128,1,float16,fp8,0,0.038245332737763725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,64,128,1,fp8,fp8,0,0.036890665690104164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,64,0,1,float16,fp8,0,0.03764266769091288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,64,0,1,fp8,fp8,0,0.03737599899371465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,64,128,1,float16,float16,0,0.03752533346414566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,64,0,1,float16,float16,0,0.03779733429352442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,64,128,1,float16,fp8,0,0.03770666569471359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,64,128,1,fp8,fp8,0,0.037418665985266365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,64,0,1,float16,fp8,0,0.03774933268626531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,64,0,1,fp8,fp8,0,0.03668266783157984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,64,128,1,float16,float16,0,0.03749333322048187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,64,0,1,float16,float16,0,0.037503999968369804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,64,128,1,float16,fp8,0,0.03808533400297165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,64,128,1,fp8,fp8,0,0.03761066744724909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,64,0,1,float16,fp8,0,0.037861332297325134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,64,0,1,fp8,fp8,0,0.037461332976818085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,64,128,1,float16,float16,0,0.025279998779296875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,64,0,1,float16,float16,0,0.025557334224383037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,64,128,1,float16,fp8,0,0.027349332968393963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,64,128,1,fp8,fp8,0,0.025386666258176167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,64,0,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,64,0,1,fp8,fp8,0,0.02739733209212621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,64,128,1,float16,float16,0,0.025349333882331848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,64,0,1,float16,float16,0,0.025226667523384094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,64,128,1,float16,fp8,0,0.025429333249727886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,64,128,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,64,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,64,0,1,fp8,fp8,0,0.025370667378107708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,64,128,1,float16,float16,0,0.025301332275072735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,64,0,1,float16,float16,0,0.025221332907676697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,64,128,1,float16,fp8,0,0.025279998779296875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,64,0,1,float16,fp8,0,0.025349333882331848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,64,128,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,64,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,64,128,1,float16,float16,0,0.025514667232831318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,64,0,1,float16,float16,0,0.025349333882331848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,64,128,1,float16,fp8,0,0.02534399926662445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,64,128,1,fp8,fp8,0,0.025279998779296875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,64,0,1,float16,fp8,0,0.02521066615978877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,64,0,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,64,128,1,float16,float16,0,0.02516266703605652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,64,0,1,float16,float16,0,0.025429333249727886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,64,128,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,64,128,1,fp8,fp8,0,0.0252960001428922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,64,0,1,float16,fp8,0,0.025594666600227356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,64,0,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,64,128,1,float16,float16,0,0.01913600042462349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,64,128,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,64,128,1,fp8,fp8,0,0.01961600035429001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,64,0,1,float16,float16,0,0.019925333559513092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,64,0,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,64,0,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,64,128,1,float16,float16,0,0.020010666300853092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,64,0,1,float16,float16,0,0.018160000443458557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,64,128,1,float16,fp8,0,0.018672000616788864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,64,128,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,64,0,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,64,0,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,64,0,1,float16,float16,0,0.019071999937295914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,64,128,1,float16,float16,0,0.01947733387351036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,64,128,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,64,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,64,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,64,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,64,128,1,float16,float16,0,0.018944000204404194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,64,0,1,float16,float16,0,0.019445333629846573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,64,128,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,64,128,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,64,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,64,0,1,fp8,fp8,0,0.01966933285196622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,64,128,1,float16,float16,0,0.01844800015290578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,64,0,1,float16,float16,0,0.019071999937295914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,64,128,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,64,128,1,float16,fp8,0,0.01942933350801468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,64,0,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,64,0,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,64,128,1,float16,float16,0,0.019466667125622433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,64,128,1,float16,fp8,0,0.018698666244745255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,64,0,1,float16,fp8,0,0.01945066700379054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,64,128,1,fp8,fp8,0,0.019498666127522785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,64,0,1,fp8,fp8,0,0.017525333911180496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,64,128,1,float16,float16,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,64,0,1,float16,float16,0,0.0176959993938605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,64,128,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,64,128,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,64,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,64,128,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,64,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,64,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,64,0,1,fp8,fp8,0,0.01752000053723653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,64,128,1,float16,float16,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,64,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,64,128,1,float16,fp8,0,0.018181333939234417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,64,0,1,fp8,fp8,0,0.017616000026464462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,64,128,1,float16,float16,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,64,128,1,float16,fp8,0,0.017477333545684814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,64,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,64,0,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,64,128,1,float16,float16,0,0.5055359999338785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,64,0,1,float16,float16,0,0.5020480155944824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,64,128,1,float16,fp8,0,0.5020106633504232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,64,128,1,fp8,fp8,0,0.46795201301574707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,64,0,1,float16,fp8,0,0.502565344174703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,64,0,1,fp8,fp8,0,0.4689013163248698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,64,128,1,float16,float16,0,0.5017866690953573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,64,0,1,float16,float16,0,0.5015733242034912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,64,128,1,float16,fp8,0,0.4982293446858724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,64,128,1,fp8,fp8,0,0.4722506602605184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,64,0,1,float16,fp8,0,0.5001866817474365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,64,0,1,fp8,fp8,0,0.47332266966501874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,64,128,1,float16,float16,0,0.5031520128250122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,64,0,1,float16,float16,0,0.5020800034205118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,64,128,1,float16,fp8,0,0.5000960032145182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,64,128,1,fp8,fp8,0,0.46860265731811523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,64,0,1,float16,fp8,0,0.501146674156189
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,64,0,1,fp8,fp8,0,0.46714667479197186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,64,128,1,float16,float16,0,0.5046133200327555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,64,0,1,float16,float16,0,0.5056106646855673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,64,128,1,fp8,fp8,0,0.4774880011876424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,64,0,1,float16,fp8,0,0.5033440192540487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,64,128,1,float16,fp8,0,0.5050400098164877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,64,0,1,fp8,fp8,0,0.4788479804992676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,64,128,1,float16,float16,0,0.27405333518981934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,64,0,1,float16,float16,0,0.27267734209696454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,64,128,1,float16,fp8,0,0.27050666014353436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,64,128,1,fp8,fp8,0,0.26655999819437665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,64,0,1,float16,fp8,0,0.27027734120686847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,64,0,1,fp8,fp8,0,0.26627200841903687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,64,128,1,float16,float16,0,0.2609226703643799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,64,0,1,float16,float16,0,0.2612053354581197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,64,128,1,float16,fp8,0,0.25994666417439777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,64,128,1,fp8,fp8,0,0.24250666300455728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,64,0,1,float16,fp8,0,0.26016000906626385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,64,0,1,fp8,fp8,0,0.24320000410079956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,64,128,1,float16,float16,0,0.2592639923095703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,64,0,1,float16,float16,0,0.2608426610628764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,64,128,1,float16,fp8,0,0.25996800263722736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,64,128,1,fp8,fp8,0,0.24282666047414145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,64,0,1,float16,fp8,0,0.25944000482559204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,64,0,1,fp8,fp8,0,0.24434133370717367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,64,128,1,float16,float16,0,0.2592639923095703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,64,0,1,float16,float16,0,0.2598666747411092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,64,128,1,float16,fp8,0,0.25857067108154297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,64,128,1,fp8,fp8,0,0.2417866587638855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,64,0,1,fp8,fp8,0,0.241157333056132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,64,0,1,float16,fp8,0,0.2585013310114543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,64,128,1,float16,float16,0,0.2626346747080485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,64,0,1,float16,float16,0,0.26233067115147907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,64,128,1,float16,fp8,0,0.25982399781545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,64,128,1,fp8,fp8,0,0.24709866444269815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,64,0,1,fp8,fp8,0,0.24724799394607544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,64,0,1,float16,fp8,0,0.26071999470392865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,64,128,1,float16,float16,0,0.14428266882896423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,64,128,1,float16,fp8,0,0.14216533303260803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,64,0,1,float16,float16,0,0.14416000247001648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,64,128,1,fp8,fp8,0,0.1423679987589518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,64,0,1,float16,fp8,0,0.14251200358072916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,64,0,1,fp8,fp8,0,0.1425279974937439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,64,128,1,float16,float16,0,0.1362559994061788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,64,128,1,float16,fp8,0,0.13634666800498962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,64,128,1,fp8,fp8,0,0.12572800119717917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,64,0,1,float16,float16,0,0.13537599643071493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,64,0,1,fp8,fp8,0,0.12575466434160867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,64,0,1,float16,fp8,0,0.13621866703033447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,64,128,1,float16,float16,0,0.13588266571362814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,64,0,1,float16,float16,0,0.1365386644999186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,64,128,1,fp8,fp8,0,0.12692800164222717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,64,128,1,float16,fp8,0,0.13662933309872946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,64,0,1,float16,fp8,0,0.13753066460291544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,64,0,1,fp8,fp8,0,0.12780267000198364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,64,128,1,float16,float16,0,0.13714667161305746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,64,0,1,float16,float16,0,0.13613333304723105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,64,128,1,float16,fp8,0,0.13606933752695718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,64,128,1,fp8,fp8,0,0.12659733494122824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,64,0,1,float16,fp8,0,0.1362559994061788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,64,0,1,fp8,fp8,0,0.12786133090655008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,64,128,1,float16,float16,0,0.1381119986375173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,64,0,1,float16,float16,0,0.13760000467300415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,64,128,1,float16,fp8,0,0.1369653344154358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,64,128,1,fp8,fp8,0,0.13116799791653952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,64,128,1,float16,float16,0,0.08142399787902832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,64,0,1,float16,fp8,0,0.1376479963461558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,64,0,1,fp8,fp8,0,0.13059733311335245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,64,0,1,float16,float16,0,0.08078933258851369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,64,128,1,float16,fp8,0,0.07993599772453308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,64,128,1,fp8,fp8,0,0.081386665503184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,64,0,1,float16,fp8,0,0.08097066481908162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,64,0,1,fp8,fp8,0,0.0806826651096344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,64,128,1,float16,float16,0,0.07769066592057546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,64,0,1,float16,float16,0,0.07657599945863088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,64,128,1,float16,fp8,0,0.0765119989713033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,64,128,1,fp8,fp8,0,0.07256533205509186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,64,0,1,fp8,fp8,0,0.07247999807198842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,64,128,1,float16,float16,0,0.07694399853547414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,64,0,1,float16,float16,0,0.07662400106589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,64,0,1,float16,fp8,0,0.07763200004895528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,64,128,1,float16,fp8,0,0.07663466533025105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,64,128,1,fp8,fp8,0,0.07277333239714305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,64,0,1,float16,fp8,0,0.07658666869004567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,64,0,1,fp8,fp8,0,0.07311999797821045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,64,128,1,float16,float16,0,0.07690133154392242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,64,0,1,float16,float16,0,0.0775786687930425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,64,128,1,float16,fp8,0,0.0767680009206136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,64,128,1,fp8,fp8,0,0.07294400036334991
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,64,0,1,float16,fp8,0,0.07687999804814656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,64,0,1,fp8,fp8,0,0.07249066730340321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,64,128,1,float16,float16,0,0.07717866698900859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,64,0,1,float16,float16,0,0.07712000111738841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,64,128,1,float16,fp8,0,0.07734933495521545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,64,128,1,fp8,fp8,0,0.07283199826876323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,64,0,1,float16,fp8,0,0.0772213339805603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,64,128,1,float16,float16,0,0.04807466765244802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,64,0,1,float16,float16,0,0.04610133171081543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,64,128,1,float16,fp8,0,0.04814933240413666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,64,0,1,fp8,fp8,0,0.07369066774845123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,64,128,1,fp8,fp8,0,0.045978665351867676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,64,0,1,float16,fp8,0,0.04609066744645437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,64,0,1,fp8,fp8,0,0.046165332198143005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,64,128,1,float16,float16,0,0.04607999821503957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,64,0,1,float16,float16,0,0.045850664377212524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,64,128,1,float16,fp8,0,0.04593066871166229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,64,128,1,fp8,fp8,0,0.044250667095184326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,64,0,1,float16,fp8,0,0.04621866842110952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,64,0,1,fp8,fp8,0,0.04370133578777313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,64,128,1,float16,float16,0,0.04624533156553904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,64,0,1,float16,float16,0,0.045738667249679565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,64,128,1,float16,fp8,0,0.04596266647179922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,64,128,1,fp8,fp8,0,0.044154668847719826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,64,0,1,float16,fp8,0,0.04613866905371348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,64,0,1,fp8,fp8,0,0.04378666480382284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,64,128,1,float16,float16,0,0.045893331368764244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,64,0,1,float16,float16,0,0.04611733555793762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,64,128,1,float16,fp8,0,0.046015997727712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,64,128,1,fp8,fp8,0,0.04383466641108195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,64,0,1,float16,fp8,0,0.04606399933497111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,64,0,1,fp8,fp8,0,0.04374399781227112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,64,128,1,float16,float16,0,0.04576533536116282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,64,0,1,float16,float16,0,0.04563733438650767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,64,128,1,float16,fp8,0,0.045696000258127846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,64,128,1,fp8,fp8,0,0.04378666480382284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,64,0,1,float16,fp8,0,0.046037331223487854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,64,128,1,float16,float16,0,0.029605334003766377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,64,0,1,fp8,fp8,0,0.04403733213742574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,64,0,1,float16,float16,0,0.03155199935038885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,64,128,1,float16,fp8,0,0.03137599925200144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,64,128,1,fp8,fp8,0,0.02959466725587845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,64,0,1,float16,fp8,0,0.0315786674618721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,64,0,1,fp8,fp8,0,0.031386665999889374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,64,128,1,float16,float16,0,0.03136533250411352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,64,0,1,float16,float16,0,0.0315733328461647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,64,128,1,float16,fp8,0,0.03126399964094162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,64,128,1,fp8,fp8,0,0.029322666426499683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,64,0,1,float16,fp8,0,0.029648000995318096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,64,0,1,fp8,fp8,0,0.02921066681543986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,64,128,1,float16,float16,0,0.03151999910672506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,64,128,1,float16,fp8,0,0.02942933390537898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,64,0,1,float16,float16,0,0.029461334149042766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,64,128,1,fp8,fp8,0,0.029535998900731403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,64,0,1,float16,fp8,0,0.03142400085926056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,64,0,1,fp8,fp8,0,0.029546665648619335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,64,128,1,float16,float16,0,0.029552000264326733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,64,0,1,float16,float16,0,0.031354665756225586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,64,128,1,fp8,fp8,0,0.029274667302767437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,64,128,1,float16,fp8,0,0.03133866687615713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,64,0,1,float16,fp8,0,0.03150933235883713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,64,0,1,fp8,fp8,0,0.029215998947620392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,64,128,1,float16,float16,0,0.02938666691382726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,64,0,1,float16,float16,0,0.03136000037193298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,64,128,1,float16,fp8,0,0.02959999938805898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,64,128,1,fp8,fp8,0,0.029279999434947968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,64,0,1,fp8,fp8,0,0.02959466725587845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,64,0,1,float16,fp8,0,0.029578665892283123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,64,128,1,float16,float16,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,64,0,1,float16,float16,0,0.023082666099071503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,64,128,1,float16,fp8,0,0.021530665457248688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,64,128,1,fp8,fp8,0,0.02143999934196472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,64,0,1,float16,fp8,0,0.021349333226680756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,64,0,1,fp8,fp8,0,0.02149333308140437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,64,128,1,float16,float16,0,0.021456000705560047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,64,0,1,float16,float16,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,64,128,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,64,128,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,64,0,1,float16,fp8,0,0.022458667556444805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,64,0,1,fp8,fp8,0,0.021386665602525074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,64,128,1,float16,float16,0,0.021189334491888683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,64,0,1,float16,float16,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,64,128,1,float16,fp8,0,0.023018665611743927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,64,128,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,64,0,1,float16,fp8,0,0.0225600004196167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,64,0,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,64,128,1,float16,float16,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,64,0,1,float16,float16,0,0.021104000508785248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,64,128,1,float16,fp8,0,0.02179199953873952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,64,128,1,fp8,fp8,0,0.021210665504137676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,64,0,1,float16,fp8,0,0.02149333308140437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,64,0,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,64,128,1,float16,float16,0,0.021183999876181286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,64,0,1,float16,float16,0,0.02117866774400075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,64,128,1,float16,fp8,0,0.021386665602525074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,64,128,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,64,0,1,float16,fp8,0,0.021290667355060577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,64,0,1,fp8,fp8,0,0.02163200080394745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,64,128,1,float16,float16,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,64,128,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,64,128,1,fp8,fp8,0,0.01865600049495697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,64,0,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,64,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,64,128,1,float16,float16,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,64,0,1,float16,float16,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,64,128,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,64,128,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,64,0,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,64,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,64,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,64,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,64,128,1,float16,fp8,0,0.01754133279124896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,64,128,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,64,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,64,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,64,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,64,128,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,64,128,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,64,0,1,float16,fp8,0,0.01821333294113477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,64,0,1,fp8,fp8,0,0.017664000391960144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,64,128,1,float16,float16,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,64,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,64,128,1,float16,fp8,0,0.018751999984184902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,64,128,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,64,0,1,fp8,fp8,0,0.017557332913080852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,64,128,1,float16,float16,0,0.016575999557971954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,64,128,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,64,128,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,64,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,64,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,64,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,64,128,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,64,0,1,float16,fp8,0,0.01785600061217944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,64,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,64,128,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,64,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,64,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,64,128,1,float16,float16,0,0.01594666639963786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,64,128,1,fp8,fp8,0,0.01661866654952367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,64,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,64,128,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,64,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,64,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,64,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,64,0,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,64,128,1,float16,float16,0,0.4047893285751343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,64,0,1,float16,float16,0,0.40328001976013184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,64,128,1,float16,fp8,0,0.4019999901453654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,64,128,1,fp8,fp8,0,0.3672800064086914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,64,0,1,float16,fp8,0,0.4024053414662679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,64,0,1,fp8,fp8,0,0.3662666479746501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,64,128,1,float16,float16,0,0.40300265947977704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,64,0,1,float16,float16,0,0.4025226831436157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,64,128,1,float16,fp8,0,0.40094931920369464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,64,128,1,fp8,fp8,0,0.3672106663386027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,64,0,1,float16,fp8,0,0.40189866224924725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,64,0,1,fp8,fp8,0,0.36821866035461426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,64,128,1,float16,float16,0,0.4030453364054362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,64,0,1,float16,float16,0,0.40250666936238605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,64,128,1,float16,fp8,0,0.4013226826985677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,64,128,1,fp8,fp8,0,0.365994652112325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,64,0,1,fp8,fp8,0,0.3656586805979411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,64,0,1,float16,fp8,0,0.4010560115178426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,64,128,1,float16,float16,0,0.40440531571706134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,64,0,1,float16,float16,0,0.4023466507593791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,64,128,1,float16,fp8,0,0.40241066614786786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,64,128,1,fp8,fp8,0,0.37004268169403076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,64,0,1,fp8,fp8,0,0.3713119824727376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,64,0,1,float16,fp8,0,0.40117335319519043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,64,128,1,float16,float16,0,0.21565866470336914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,64,0,1,float16,float16,0,0.214410662651062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,64,128,1,float16,fp8,0,0.21427732706069946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,64,128,1,fp8,fp8,0,0.20429333051045737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,64,0,1,float16,fp8,0,0.2140480081240336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,64,0,1,fp8,fp8,0,0.20424532890319824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,64,128,1,float16,float16,0,0.20881066719690958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,64,0,1,float16,float16,0,0.20813866456349692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,64,128,1,fp8,fp8,0,0.18760534127553305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,64,128,1,float16,fp8,0,0.20781866709391275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,64,0,1,float16,fp8,0,0.20809600750605264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,64,0,1,fp8,fp8,0,0.18858667214711508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,64,128,1,float16,float16,0,0.20777066548665366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,64,0,1,float16,float16,0,0.20868800083796182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,64,128,1,fp8,fp8,0,0.18906132380167642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,64,128,1,float16,fp8,0,0.2076639930407206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,64,0,1,float16,fp8,0,0.20779200394948324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,64,128,1,float16,float16,0,0.2080693244934082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,64,0,1,fp8,fp8,0,0.18952532609303793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,64,0,1,float16,float16,0,0.2095306714375814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,64,128,1,float16,fp8,0,0.20759467283884683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,64,128,1,fp8,fp8,0,0.18793600797653198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,64,0,1,float16,fp8,0,0.20726933081944784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,64,0,1,fp8,fp8,0,0.18904000520706177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,64,128,1,float16,float16,0,0.21280533075332642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,64,0,1,float16,float16,0,0.20917334159215292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,64,128,1,float16,fp8,0,0.20817599693934122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,64,128,1,fp8,fp8,0,0.19190933307011923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,64,0,1,float16,fp8,0,0.20874667167663574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,64,0,1,fp8,fp8,0,0.19131733973821005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,64,128,1,float16,float16,0,0.11581333478291829
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,64,0,1,float16,float16,0,0.11596799890200298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,64,128,1,float16,fp8,0,0.11391466856002808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,64,128,1,fp8,fp8,0,0.10969066619873047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,64,0,1,float16,fp8,0,0.11405866344769795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,64,0,1,fp8,fp8,0,0.11083199580510457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,64,128,1,float16,float16,0,0.11167466640472412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,64,0,1,float16,float16,0,0.11131733655929565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,64,128,1,float16,fp8,0,0.11145066221555074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,64,0,1,float16,fp8,0,0.11198932925860088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,64,0,1,fp8,fp8,0,0.10188800096511841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,64,128,1,fp8,fp8,0,0.10357333223025005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,64,128,1,float16,float16,0,0.11191466450691223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,64,0,1,float16,float16,0,0.11195733149846394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,64,128,1,float16,fp8,0,0.11020800471305847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,64,128,1,fp8,fp8,0,0.10335466265678406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,64,0,1,float16,fp8,0,0.11054933071136475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,64,0,1,fp8,fp8,0,0.102101335922877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,64,128,1,float16,float16,0,0.11008532842000325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,64,0,1,float16,float16,0,0.1111253301302592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,64,128,1,float16,fp8,0,0.11045866211255391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,64,128,1,fp8,fp8,0,0.10124799609184265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,64,0,1,float16,fp8,0,0.11179733276367188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,64,0,1,fp8,fp8,0,0.1014453371365865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,64,128,1,float16,float16,0,0.11129599809646606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,64,0,1,float16,float16,0,0.11141866445541382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,64,128,1,float16,fp8,0,0.11075733105341594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,64,128,1,fp8,fp8,0,0.1032426655292511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,64,0,1,float16,fp8,0,0.11002133289972942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,64,128,1,float16,float16,0,0.06459199885527293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,64,0,1,fp8,fp8,0,0.10400533676147461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,64,0,1,float16,float16,0,0.06474133332570393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,64,128,1,float16,fp8,0,0.06447466711203258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,64,128,1,fp8,fp8,0,0.06213866670926412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,64,0,1,float16,fp8,0,0.06443200012048085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,64,0,1,fp8,fp8,0,0.061664000153541565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,64,128,1,float16,float16,0,0.06413866579532623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,64,0,1,float16,float16,0,0.06305066744486491
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,64,128,1,float16,fp8,0,0.06244266529877981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,64,128,1,fp8,fp8,0,0.06041066845258077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,64,0,1,float16,fp8,0,0.06423999865849812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,64,0,1,fp8,fp8,0,0.0602453351020813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,64,128,1,float16,float16,0,0.06337066491444905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,64,128,1,fp8,fp8,0,0.05971199770768484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,64,128,1,float16,fp8,0,0.06423999865849812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,64,0,1,float16,float16,0,0.0641653339068095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,64,0,1,float16,fp8,0,0.06380266447861989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,64,0,1,fp8,fp8,0,0.05910933514436086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,64,128,1,float16,float16,0,0.0634080022573471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,64,0,1,float16,float16,0,0.06299200157324474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,64,128,1,float16,fp8,0,0.06267199913660686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,64,128,1,fp8,fp8,0,0.06004266440868378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,64,0,1,float16,fp8,0,0.06420266628265381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,64,0,1,fp8,fp8,0,0.05896000067392985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,64,128,1,float16,float16,0,0.06418133278687795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,64,0,1,float16,float16,0,0.06419200201829274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,64,128,1,float16,fp8,0,0.0624533345301946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,64,128,1,fp8,fp8,0,0.06084266801675161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,64,0,1,float16,fp8,0,0.06390400230884552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,64,0,1,fp8,fp8,0,0.06031466523806254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,64,128,1,float16,float16,0,0.03956799954175949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,64,0,1,float16,float16,0,0.037808001041412354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,64,128,1,float16,fp8,0,0.039647998909155525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,64,128,1,fp8,fp8,0,0.03766933331886927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,64,0,1,float16,fp8,0,0.039546666045983635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,64,0,1,fp8,fp8,0,0.037808001041412354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,64,128,1,float16,float16,0,0.03770666569471359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,64,0,1,float16,float16,0,0.03842133283615112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,64,128,1,float16,fp8,0,0.037871999045213066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,64,128,1,fp8,fp8,0,0.03755733370780945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,64,0,1,float16,fp8,0,0.039103999733924866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,64,0,1,fp8,fp8,0,0.035562666753927864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,64,128,1,float16,float16,0,0.03760000069936117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,64,0,1,float16,float16,0,0.03850133220354716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,64,128,1,float16,fp8,0,0.037605332831541695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,64,128,1,fp8,fp8,0,0.035674666364987694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,64,0,1,float16,fp8,0,0.03750933210055033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,64,0,1,fp8,fp8,0,0.0374293327331543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,64,128,1,float16,float16,0,0.03759466608365377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,64,0,1,float16,float16,0,0.03745066622893015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,64,128,1,float16,fp8,0,0.03950933367013931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,64,128,1,fp8,fp8,0,0.035775999228159584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,64,0,1,float16,fp8,0,0.03950933367013931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,64,0,1,fp8,fp8,0,0.03783999880154928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,64,128,1,float16,float16,0,0.03814399987459183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,64,0,1,float16,float16,0,0.03937066594759623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,64,128,1,float16,fp8,0,0.039434666434923805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,64,128,1,fp8,fp8,0,0.03751466671625773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,64,0,1,float16,fp8,0,0.03745066622893015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,64,0,1,fp8,fp8,0,0.03678400069475174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,64,128,1,float16,float16,0,0.02719466636578242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,64,0,1,float16,float16,0,0.027317332724730175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,64,128,1,float16,fp8,0,0.02719466636578242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,64,128,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,64,0,1,float16,fp8,0,0.027136000494162243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,64,0,1,fp8,fp8,0,0.027285332481066387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,64,128,1,float16,float16,0,0.027232001225153606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,64,0,1,float16,float16,0,0.02569599946339925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,64,128,1,float16,fp8,0,0.027424000203609467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,64,128,1,fp8,fp8,0,0.025472000241279602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,64,0,1,float16,fp8,0,0.02717866748571396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,64,0,1,fp8,fp8,0,0.025173333783944447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,64,128,1,float16,float16,0,0.02731200059254964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,64,0,1,float16,float16,0,0.027274665733178455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,64,128,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,64,128,1,float16,fp8,0,0.026858667532602947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,64,0,1,float16,fp8,0,0.027269333600997925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,64,0,1,fp8,fp8,0,0.0252960001428922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,64,128,1,float16,float16,0,0.025578667720158894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,64,0,1,float16,float16,0,0.0252960001428922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,64,128,1,fp8,fp8,0,0.025407999753952026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,64,0,1,float16,fp8,0,0.026357332865397137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,64,128,1,float16,fp8,0,0.028250666956106823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,64,0,1,fp8,fp8,0,0.027258666853109997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,64,128,1,float16,float16,0,0.02722666660944621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,64,0,1,float16,float16,0,0.027402666707833607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,64,128,1,fp8,fp8,0,0.02716800073782603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,64,128,1,float16,fp8,0,0.02824000020821889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,64,0,1,float16,fp8,0,0.02741333345572154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,64,0,1,fp8,fp8,0,0.027263998985290527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,64,128,1,float16,float16,0,0.019760000209013622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,64,0,1,float16,float16,0,0.019071999937295914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,64,128,1,float16,fp8,0,0.02096533278624217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,64,0,1,float16,fp8,0,0.019850666324297588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,64,128,1,fp8,fp8,0,0.019621333728233974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,64,0,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,64,128,1,float16,float16,0,0.01932799940307935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,64,128,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,64,0,1,float16,float16,0,0.021503999829292297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,64,128,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,64,0,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,64,0,1,fp8,fp8,0,0.020794666061798733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,64,128,1,float16,float16,0,0.021055998901526134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,64,0,1,float16,float16,0,0.019007999449968338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,64,128,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,64,128,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,64,0,1,float16,fp8,0,0.02120000123977661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,64,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,64,128,1,float16,float16,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,64,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,64,128,1,float16,fp8,0,0.019381333142518997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,64,128,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,64,0,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,64,0,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,64,128,1,float16,float16,0,0.02110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,64,0,1,float16,float16,0,0.021066665649414062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,64,128,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,64,128,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,64,0,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,64,0,1,fp8,fp8,0,0.02091199904680252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,64,128,1,float16,float16,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,64,0,1,float16,float16,0,0.01657066618402799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,64,128,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,64,128,1,float16,float16,0,0.016336000214020412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,64,0,1,float16,float16,0,0.017498667041460674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,64,128,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,64,128,1,fp8,fp8,0,0.016293333222468693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,64,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,64,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,64,128,1,float16,float16,0,0.015530666957298914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,64,0,1,float16,float16,0,0.0186666672428449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,64,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,64,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,64,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,64,128,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,64,128,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,64,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,64,128,1,float16,float16,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,64,0,1,float16,float16,0,0.017397332936525345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,64,128,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,64,128,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,64,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,64,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,64,128,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,64,128,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,64,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,64,0,1,fp8,fp8,0,0.016058667252461117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,64,0,1,float16,float16,0,0.016549333930015564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,64,128,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,64,128,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,64,0,1,fp8,fp8,0,0.016415999581416447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,64,128,1,float16,float16,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,64,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,64,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,64,128,1,float16,float16,0,0.01498666654030482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,64,0,1,float16,float16,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,64,128,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,64,0,1,fp8,fp8,0,0.016613333175579708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,64,128,1,float16,float16,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,64,0,1,float16,float16,0,0.016117333124081295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,64,128,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,64,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,64,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,64,128,1,float16,float16,0,0.35073598225911456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,64,0,1,float16,float16,0,0.34914131959279376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,64,128,1,float16,fp8,0,0.34915733337402344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,64,128,1,fp8,fp8,0,0.3154720067977905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,64,0,1,float16,fp8,0,0.34776532649993896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,64,0,1,fp8,fp8,0,0.31574400266011554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,64,128,1,float16,float16,0,0.3489333391189575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,64,0,1,float16,float16,0,0.349727988243103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,64,128,1,float16,fp8,0,0.3504106601079305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,64,128,1,fp8,fp8,0,0.3146559993426005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,64,0,1,float16,fp8,0,0.3487199942270915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,64,0,1,fp8,fp8,0,0.31513067086537677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,64,128,1,float16,float16,0,0.349455992380778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,64,0,1,float16,float16,0,0.3496640125910441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,64,128,1,float16,fp8,0,0.34969600041707355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,64,128,1,fp8,fp8,0,0.31546666224797565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,64,0,1,float16,fp8,0,0.34904531637827557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,64,0,1,fp8,fp8,0,0.3162506620089213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,64,128,1,float16,float16,0,0.3500853379567464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,64,0,1,float16,float16,0,0.35077865918477374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,64,128,1,fp8,fp8,0,0.3193653424580892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,64,128,1,float16,fp8,0,0.34902934233347577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,64,0,1,float16,fp8,0,0.350874662399292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,64,128,1,float16,float16,0,0.18371200561523438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,64,0,1,float16,float16,0,0.1853653391202291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,64,0,1,fp8,fp8,0,0.31750933329264325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,64,128,1,float16,fp8,0,0.18331199884414673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,64,128,1,fp8,fp8,0,0.1749173402786255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,64,0,1,float16,fp8,0,0.18345600366592407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,64,0,1,fp8,fp8,0,0.17488000790278116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,64,128,1,float16,float16,0,0.18125333388646445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,64,0,1,float16,float16,0,0.18125333388646445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,64,128,1,float16,fp8,0,0.1813973387082418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,64,128,1,fp8,fp8,0,0.16546666622161865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,64,0,1,float16,fp8,0,0.18123199542363486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,64,0,1,fp8,fp8,0,0.16532267133394876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,64,128,1,float16,float16,0,0.18121600151062012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,64,0,1,float16,float16,0,0.1812373399734497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,64,128,1,fp8,fp8,0,0.16545066237449646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,64,128,1,float16,fp8,0,0.18092799186706543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,64,0,1,float16,fp8,0,0.1811573306719462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,64,0,1,fp8,fp8,0,0.16674133141835532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,64,128,1,float16,float16,0,0.18138132492701212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,64,0,1,float16,float16,0,0.17986132701237997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,64,128,1,float16,fp8,0,0.18148267269134521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,64,128,1,fp8,fp8,0,0.1667893330256144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,64,0,1,float16,fp8,0,0.1795253356297811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,64,0,1,fp8,fp8,0,0.16685332854588827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,64,128,1,float16,float16,0,0.1811093290646871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,64,0,1,float16,float16,0,0.1796906590461731
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,64,128,1,fp8,fp8,0,0.1674399971961975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,64,128,1,float16,fp8,0,0.18226132790247598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,64,0,1,float16,fp8,0,0.1811359922091166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,64,0,1,fp8,fp8,0,0.16552533706029257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,64,128,1,float16,float16,0,0.09921600421269734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,64,128,1,float16,fp8,0,0.09860799709955852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,64,0,1,float16,float16,0,0.09913600484530131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,64,128,1,fp8,fp8,0,0.09290132919947307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,64,0,1,float16,fp8,0,0.09907199939092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,64,0,1,fp8,fp8,0,0.09286399682362874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,64,128,1,float16,float16,0,0.09799466530481975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,64,0,1,float16,float16,0,0.0971999963124593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,64,128,1,float16,fp8,0,0.09943466385205586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,64,128,1,fp8,fp8,0,0.09045867125193278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,64,0,1,float16,fp8,0,0.09725866715113322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,64,128,1,float16,float16,0,0.09909866253534953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,64,0,1,fp8,fp8,0,0.09006933371225993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,64,0,1,float16,float16,0,0.09718933701515198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,64,128,1,float16,fp8,0,0.09724799791971843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,64,128,1,fp8,fp8,0,0.09105599919954936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,64,0,1,fp8,fp8,0,0.09082667032877605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,64,0,1,float16,fp8,0,0.0978613297144572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,64,128,1,float16,float16,0,0.09719467163085938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,64,0,1,float16,float16,0,0.09749866525332133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,64,128,1,float16,fp8,0,0.09782933195432027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,64,128,1,fp8,fp8,0,0.09122133255004883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,64,0,1,float16,fp8,0,0.09735999504725139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,64,0,1,fp8,fp8,0,0.09093333284060161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,64,128,1,float16,float16,0,0.09730666875839233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,64,0,1,float16,float16,0,0.09851200381914775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,64,128,1,float16,fp8,0,0.09717333316802979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,64,128,1,fp8,fp8,0,0.09077866872151692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,64,0,1,float16,fp8,0,0.09820266564687093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,64,128,1,float16,float16,0,0.0574186642964681
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,64,0,1,fp8,fp8,0,0.09112000465393066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,64,0,1,float16,float16,0,0.05805333455403646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,64,128,1,float16,fp8,0,0.0561653325955073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,64,128,1,fp8,fp8,0,0.05403199791908264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,64,0,1,float16,fp8,0,0.05786666770776113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,64,0,1,fp8,fp8,0,0.05392533540725708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,64,128,1,float16,float16,0,0.05666666726271311
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,64,0,1,float16,float16,0,0.05586666862169901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,64,128,1,float16,fp8,0,0.05659733215967814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,64,128,1,fp8,fp8,0,0.05388799806435903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,64,0,1,float16,fp8,0,0.056421334544817604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,64,0,1,fp8,fp8,0,0.05217599868774414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,64,0,1,float16,float16,0,0.05649599929650625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,64,128,1,float16,float16,0,0.05594133337338766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,64,128,1,float16,fp8,0,0.056549335519472756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,64,128,1,fp8,fp8,0,0.05208000044027964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,64,0,1,float16,fp8,0,0.05599466462930044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,64,0,1,fp8,fp8,0,0.0539680023988088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,64,128,1,float16,float16,0,0.05612266560395559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,64,0,1,float16,float16,0,0.056405335664749146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,64,128,1,float16,fp8,0,0.05603733162085215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,64,128,1,fp8,fp8,0,0.054154664278030396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,64,0,1,float16,fp8,0,0.056176001826922096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,64,0,1,fp8,fp8,0,0.05420266588528951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,64,128,1,float16,float16,0,0.05621333420276642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,64,0,1,float16,float16,0,0.0561653325955073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,64,128,1,float16,fp8,0,0.056186666091283165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,64,128,1,fp8,fp8,0,0.052815998593966164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,64,0,1,float16,fp8,0,0.056186666091283165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,64,0,1,fp8,fp8,0,0.053930665055910744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,64,128,1,float16,float16,0,0.0355679988861084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,64,0,1,float16,float16,0,0.0354720006386439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,64,128,1,float16,fp8,0,0.03369066615899404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,64,128,1,fp8,fp8,0,0.03373866776625315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,64,0,1,float16,fp8,0,0.03594133257865906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,64,0,1,fp8,fp8,0,0.033600000043710075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,64,128,1,float16,float16,0,0.03349333256483078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,64,0,1,float16,float16,0,0.03401600072781245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,64,128,1,float16,fp8,0,0.03336533407370249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,64,128,1,fp8,fp8,0,0.03326933334271113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,64,0,1,float16,fp8,0,0.03387733300526937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,64,0,1,fp8,fp8,0,0.031957333286603294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,64,128,1,float16,float16,0,0.03562133262554804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,64,0,1,float16,float16,0,0.03532266616821289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,64,128,1,float16,fp8,0,0.035360001027584076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,64,128,1,fp8,fp8,0,0.033413333197434746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,64,0,1,float16,fp8,0,0.03552533437808355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,64,0,1,fp8,fp8,0,0.033402666449546814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,64,128,1,float16,float16,0,0.035546667873859406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,64,0,1,float16,float16,0,0.03554133325815201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,64,128,1,float16,fp8,0,0.03344533344109853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,64,128,1,fp8,fp8,0,0.03365333378314972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,64,0,1,float16,fp8,0,0.03557866563399633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,64,0,1,fp8,fp8,0,0.03293866664171219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,64,128,1,float16,float16,0,0.03374933451414108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,64,0,1,float16,float16,0,0.03376533339420954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,64,128,1,float16,fp8,0,0.03541333228349686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,64,128,1,fp8,fp8,0,0.03329066683848699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,64,0,1,float16,fp8,0,0.03376533339420954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,64,0,1,fp8,fp8,0,0.03338133295377096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,64,128,1,float16,float16,0,0.02515733242034912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,64,0,1,float16,float16,0,0.025514667232831318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,64,128,1,float16,fp8,0,0.02550400048494339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,64,128,1,fp8,fp8,0,0.024688000480333965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,64,0,1,float16,fp8,0,0.02533866713444392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,64,0,1,fp8,fp8,0,0.025392000873883564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,64,128,1,float16,float16,0,0.025610665480295818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,64,0,1,float16,float16,0,0.025397333006064098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,64,128,1,float16,fp8,0,0.02516799916823705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,64,128,1,fp8,fp8,0,0.025018667181332905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,64,0,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,64,0,1,fp8,fp8,0,0.0234400009115537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,64,128,1,float16,float16,0,0.025461333493391674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,64,0,1,float16,float16,0,0.025061334172884624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,64,128,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,64,128,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,64,0,1,float16,fp8,0,0.02553066611289978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,64,0,1,fp8,fp8,0,0.02349333216746648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,64,128,1,float16,float16,0,0.025087999800841015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,64,0,1,float16,float16,0,0.025418666501839954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,64,128,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,64,128,1,fp8,fp8,0,0.023189333577950794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,64,0,1,float16,fp8,0,0.025546667476495106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,64,0,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,64,128,1,float16,float16,0,0.025216000775496166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,64,0,1,float16,float16,0,0.025642665723959606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,64,128,1,float16,fp8,0,0.025461333493391674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,64,128,1,fp8,fp8,0,0.025226667523384094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,64,0,1,float16,fp8,0,0.025648000339667004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,64,0,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,64,128,1,float16,float16,0,0.018863999595244724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,64,0,1,float16,float16,0,0.019215999792019527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,64,128,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,64,128,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,64,0,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,64,0,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,64,128,1,float16,float16,0,0.019061333189407986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,64,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,64,128,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,64,128,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,64,0,1,float16,fp8,0,0.019434666881958645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,64,0,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,64,128,1,float16,float16,0,0.019248000035683315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,64,0,1,float16,float16,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,64,128,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,64,128,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,64,0,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,64,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,64,128,1,float16,float16,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,64,0,1,float16,float16,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,64,128,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,64,128,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,64,0,1,float16,fp8,0,0.01956266661485036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,64,0,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,64,128,1,float16,float16,0,0.019274666905403137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,64,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,64,128,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,64,128,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,64,0,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,64,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,64,128,1,float16,float16,0,0.015360000232855478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,64,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,64,128,1,fp8,fp8,0,0.01594666639963786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,64,0,1,float16,fp8,0,0.01747200017174085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,64,128,1,float16,float16,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,64,128,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,64,0,1,float16,float16,0,0.01758933315674464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,64,128,1,float16,float16,0,0.017616000026464462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,64,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,64,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,64,128,1,fp8,fp8,0,0.017525333911180496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,64,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,64,128,1,float16,float16,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,64,128,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,64,0,1,float16,float16,0,0.01791999985774358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,64,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,64,0,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,64,128,1,float16,float16,0,0.016837333639462788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,64,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,64,128,1,float16,fp8,0,0.017792000124851864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,64,128,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,64,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,64,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,64,0,1,float16,float16,0,0.015482666591803232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,64,128,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,64,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,64,128,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,64,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,64,128,1,float16,float16,0,0.015082667271296183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,64,0,1,float16,float16,0,0.015344000111023584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,64,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,64,0,1,float16,fp8,0,0.016480000068744022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,64,128,1,float16,float16,0,0.016821333517630894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,64,128,1,float16,fp8,0,0.015477333217859268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,64,0,1,float16,float16,0,0.015376000354687372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,64,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,64,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,64,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,64,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,64,128,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,64,128,1,float16,float16,0,0.015392000476519266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,64,128,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,64,128,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,64,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,64,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,0,0.2964853247006734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,0,0.29604800542195636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,0,0.2961759964625041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,1,64,128,1,fp8,fp8,0,0.2694666584332784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,0,0.2957493265469869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,1,64,0,1,fp8,fp8,0,0.2704906662305196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,0,0.29647467533747357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,0,0.2949440081914266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,0,0.2964426676432292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,2,64,128,1,fp8,fp8,0,0.26942400137583417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,0,0.29630400737126666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,2,64,0,1,fp8,fp8,0,0.26918933788935345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,0,0.2959199945131938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,0,0.2961120009422302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,0,0.29627732435862225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,4,64,128,1,fp8,fp8,0,0.2710346579551697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,0,0.296015997727712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,4,64,0,1,fp8,fp8,0,0.2690666715304057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,0,0.2964586615562439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,0,0.2955733338991801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,0,0.29604800542195636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,8,64,128,1,fp8,fp8,0,0.27129600445429486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,64,64,128,1,float16,float16,0,0.15477333466211954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,0,0.29630933205286664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,8,64,0,1,fp8,fp8,0,0.26920533180236816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,0,0.15501333276430765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,64,64,128,1,float16,fp8,0,0.15556266903877258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,64,64,128,1,fp8,fp8,0,0.14243200421333313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,0,0.15435199936230978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,64,64,0,1,fp8,fp8,0,0.14241600036621094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,0,0.15480533242225647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,0,0.15449066956837973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,0,0.15432533621788025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,1,64,0,1,fp8,fp8,0,0.14204266667366028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,0,0.15456533432006836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,1,64,128,1,fp8,fp8,0,0.1423360009988149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,0,0.15442132949829102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,0,0.15477866927782694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,0,0.1546239952246348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,2,64,128,1,fp8,fp8,0,0.1418239971001943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,0,0.15448533495267233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,2,64,0,1,fp8,fp8,0,0.141893337170283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,0,0.15453867117563883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,0,0.15467199683189392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,0,0.1547040045261383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,4,64,128,1,fp8,fp8,0,0.14217600226402283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,0,0.15471999843915304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,4,64,0,1,fp8,fp8,0,0.1421280006567637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,0,0.15471466382344565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,0,0.15471999843915304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,0,0.15461867054303488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,8,64,128,1,fp8,fp8,0,0.14197333653767905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,0,0.15476800004641214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,8,64,0,1,fp8,fp8,0,0.14218133687973022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,64,64,128,1,float16,float16,0,0.08462400237719218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,0,0.08366400003433228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,64,64,128,1,float16,fp8,0,0.0849120020866394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,64,64,128,1,fp8,fp8,0,0.07879999776681264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,0,0.08454400300979614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,64,64,0,1,fp8,fp8,0,0.07670400043328603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,0,0.08495466907819112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,0,0.08496532837549846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,0,0.08480000495910645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,1,64,128,1,fp8,fp8,0,0.07890666524569194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,0,0.08516266942024231
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,1,64,0,1,fp8,fp8,0,0.0786186655362447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,0,0.08468266328175862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,0,0.084714670976003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,0,0.08483200271924336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,2,64,128,1,fp8,fp8,0,0.07868800063927968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,0,0.0846560001373291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,2,64,0,1,fp8,fp8,0,0.07684800028800964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,0,0.08463467160860698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,0,0.08482666810353597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,0,0.08477333188056946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,4,64,128,1,fp8,fp8,0,0.07796266674995422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,0,0.0849173367023468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,4,64,0,1,fp8,fp8,0,0.07671999931335449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,0,0.08475200335184734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,0,0.0851093331972758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,0,0.08482133348782857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,0,0.08506666620572408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,8,64,128,1,fp8,fp8,0,0.0768746683994929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,8,64,0,1,fp8,fp8,0,0.07870933413505554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,64,64,128,1,float16,float16,0,0.04979733129342397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,0,0.04987200101216634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,64,64,128,1,float16,fp8,0,0.050053333242734276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,0,0.05188799897829691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,64,64,128,1,fp8,fp8,0,0.048341333866119385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,64,64,0,1,fp8,fp8,0,0.047685335079828896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,0,0.05036800106366476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,0,0.05187733471393585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,0,0.050250664353370667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,1,64,128,1,fp8,fp8,0,0.04594666759173075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,1,64,0,1,fp8,fp8,0,0.045968001087506614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,0,0.05011733373006185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,0,0.04974933465321859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,0,0.049728001157442726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,0,0.04996266464392344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,2,64,128,1,fp8,fp8,0,0.04786666731039683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,0,0.04990933338801066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,2,64,0,1,fp8,fp8,0,0.046426668763160706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,0,0.05013866722583771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,0,0.050010666251182556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,0,0.05056533217430115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,4,64,128,1,fp8,fp8,0,0.04799466828505198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,0,0.05011733373006185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,4,64,0,1,fp8,fp8,0,0.048154667019844055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,0,0.05006400247414907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,0,0.05011733373006185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,0,0.04991999765237173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,8,64,128,1,fp8,fp8,0,0.04643733302752177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,0,0.05016533533732096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,8,64,0,1,fp8,fp8,0,0.046165332198143005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,64,64,128,1,float16,float16,0,0.03155199935038885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,0,0.031727999448776245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,64,64,128,1,float16,fp8,0,0.033600000043710075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,64,64,128,1,fp8,fp8,0,0.029359998802344005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,0,0.03356799980004629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,0,0.03130666663249334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,64,64,0,1,fp8,fp8,0,0.031397332747777305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,0,0.03161599983771642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,0,0.03336533407370249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,1,64,128,1,fp8,fp8,0,0.03145600110292435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,0,0.03350933392842611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,1,64,0,1,fp8,fp8,0,0.031541332602500916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,0,0.03336533407370249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,0,0.03194666653871536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,2,64,128,1,fp8,fp8,0,0.031301334500312805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,0,0.03405333310365677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,0,0.03189333279927572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,2,64,0,1,fp8,fp8,0,0.02975466599067052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,0,0.033402666449546814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,0,0.03165333221356074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,4,64,128,1,fp8,fp8,0,0.031354665756225586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,0,0.03306666761636734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,0,0.03147733211517334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,4,64,0,1,fp8,fp8,0,0.02958400050799052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,0,0.03130666663249334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,0,0.03222399950027466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,0,0.0315733328461647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,8,64,128,1,fp8,fp8,0,0.029733332494894665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,0,0.03141333411137263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,64,64,128,1,float16,float16,0,0.025216000775496166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,8,64,0,1,fp8,fp8,0,0.031290667752424874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,0,0.025199999411900837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,64,64,128,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,64,64,128,1,fp8,fp8,0,0.023887999355793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,0,0.025077333052953083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,64,64,0,1,fp8,fp8,0,0.02317333221435547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,0,0.024720000723997753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,0,0.02587733417749405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,0,0.023520000278949738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,1,64,128,1,fp8,fp8,0,0.023706667125225067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,0,0.025621332228183746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,1,64,0,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,0,0.023226665953795116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,0,0.02314666658639908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,0,0.02532800038655599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,2,64,128,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,0,0.02499733368555705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,2,64,0,1,fp8,fp8,0,0.0232640008131663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,0,0.02402133246262868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,0,0.02518400053183238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,0,0.023354666928450268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,4,64,128,1,fp8,fp8,0,0.023370665808518726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,0,0.023391999304294586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,4,64,0,1,fp8,fp8,0,0.023365333676338196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,0,0.023386667172114056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,0,0.023232000569502514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,8,64,128,1,fp8,fp8,0,0.02310933421055476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,0,0.025205334027608235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,8,64,0,1,fp8,fp8,0,0.023237332701683044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,64,64,128,1,float16,float16,0,0.019018666197856266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,0,0.019146667172511418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,64,64,128,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,64,64,128,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,64,64,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,1,64,128,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,1,64,0,1,fp8,fp8,0,0.01950399950146675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,0,0.019093333433071773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,0,0.019391999890406925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,2,64,128,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,0,0.01942933350801468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,2,64,0,1,fp8,fp8,0,0.018842666099468868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,0,0.019440000255902607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,4,64,128,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,4,64,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,0,0.01915733392039935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,0,0.018960000326236088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,0,0.020261333634455998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,8,64,128,1,fp8,fp8,0,0.019519999623298645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,8,64,0,1,fp8,fp8,0,0.019472000499566395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,64,64,128,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,0,0.015317333241303762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,64,64,128,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,64,64,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,64,64,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,0,0.015520000209410986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,1,64,128,1,fp8,fp8,0,0.015418666104475657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,1,64,0,1,fp8,fp8,0,0.015546667079130808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,2,64,128,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,2,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,4,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,4,64,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,8,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,8,64,0,1,fp8,fp8,0,0.015397333850463232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,64,64,128,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,64,64,128,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,64,64,128,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,64,64,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,0,0.016794666647911072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,1,64,128,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,1,64,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,2,64,128,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,2,64,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,0,0.01676799977819125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,4,64,128,1,fp8,fp8,0,0.016773333152135212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,4,64,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,0,0.017968000223239262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,8,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,0,0.01756799966096878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,8,64,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,64,128,1,float16,float16,0,2.7130187352498374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,64,128,1,float16,fp8,0,2.7336158752441406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,64,128,1,fp8,fp8,0,2.4858080546061196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,64,128,1,float16,float16,0,2.732442537943522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,64,0,1,float16,float16,0,16.679210662841797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,64,0,1,float16,fp8,0,16.239866892496746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,64,0,1,fp8,fp8,0,14.730448404947916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,64,128,1,float16,fp8,0,2.7544425328572593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,64,128,1,fp8,fp8,0,2.5114026069641113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,64,128,1,float16,float16,0,2.7691520055135093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,64,0,1,float16,float16,0,16.930213928222656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,64,128,1,float16,fp8,0,2.7894773483276367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,64,128,1,fp8,fp8,0,2.5552213986714682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,64,0,1,fp8,fp8,0,14.751263936360678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,64,0,1,float16,fp8,0,15.955268859863281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,64,128,1,float16,float16,0,1.5878987312316895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,64,0,1,float16,float16,0,16.33286412556966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,64,128,1,float16,fp8,0,1.6201225916544597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,64,128,1,fp8,fp8,0,1.507695992787679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,64,0,1,float16,float16,0,8.292095820109049
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,64,0,1,fp8,fp8,0,14.797381083170572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,64,0,1,float16,fp8,0,17.540181477864582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,64,128,1,float16,float16,0,1.4081919987996419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,64,128,1,float16,fp8,0,1.4210453033447266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,64,128,1,fp8,fp8,0,1.2921280066172283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,64,0,1,fp8,fp8,0,7.684234619140625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,64,0,1,float16,fp8,0,8.605914433797201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,64,128,1,float16,float16,0,1.4172800381978352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,64,0,1,float16,float16,0,9.182218551635742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,64,128,1,float16,fp8,0,1.4296479225158691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,64,0,1,fp8,fp8,0,7.462970733642578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,64,0,1,float16,fp8,0,8.26047452290853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,64,128,1,fp8,fp8,0,1.3019253412882488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,64,128,1,float16,float16,0,1.4334400494893391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,64,0,1,float16,float16,0,8.228506724039713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,64,128,1,float16,fp8,0,1.4467040697733562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,64,128,1,fp8,fp8,0,1.3204586505889893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,64,0,1,fp8,fp8,0,7.471242904663086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,64,0,1,float16,fp8,0,8.093695958455404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,64,128,1,float16,float16,0,0.8691146373748779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,64,0,1,float16,float16,0,8.47438939412435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,64,128,1,float16,fp8,0,0.8908586502075195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,64,128,1,fp8,fp8,0,0.8365013599395752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,64,0,1,float16,float16,0,4.26801077524821
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,64,0,1,float16,fp8,0,8.34884770711263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,64,0,1,fp8,fp8,0,7.4892317454020185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,64,128,1,float16,float16,0,0.7896587053934733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,64,128,1,float16,fp8,0,0.794538656870524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,64,0,1,fp8,fp8,0,3.9720640182495117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,64,0,1,float16,fp8,0,4.374309221903483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,64,128,1,fp8,fp8,0,0.7307199637095133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,64,0,1,float16,float16,0,4.262768109639485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,64,128,1,float16,float16,0,0.7912586530049642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,64,128,1,float16,fp8,0,0.7974080244700114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,64,128,1,fp8,fp8,0,0.734282652537028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,64,0,1,fp8,fp8,0,3.862016042073568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,64,0,1,float16,fp8,0,4.164213180541992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,64,128,1,float16,float16,0,0.7981706460316976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,64,0,1,float16,float16,0,4.3069760004679365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,64,128,1,float16,fp8,0,0.805626630783081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,64,0,1,fp8,fp8,0,3.8681920369466147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,64,0,1,float16,fp8,0,4.205226580301921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,64,128,1,fp8,fp8,0,0.7432533105214437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,64,128,1,float16,float16,0,0.605402668317159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,64,0,1,float16,float16,0,4.175392150878906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,64,128,1,float16,fp8,0,0.6074399948120117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,64,128,1,fp8,fp8,0,0.5659519831339518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,64,0,1,float16,float16,0,2.356869379679362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,64,0,1,fp8,fp8,0,3.875685373942057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,64,0,1,float16,fp8,0,4.180538813273112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,64,128,1,float16,float16,0,0.6086026827494303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,64,0,1,float16,fp8,0,2.35642671585083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,64,128,1,float16,fp8,0,0.606714685757955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,64,0,1,fp8,fp8,0,2.1797919273376465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,64,128,1,fp8,fp8,0,0.5681546529134115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,64,0,1,float16,float16,0,2.343445301055908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,64,128,1,float16,float16,0,0.6078666845957438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,64,0,1,fp8,fp8,0,2.1791626612345376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,64,0,1,float16,fp8,0,2.344202677408854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,64,128,1,float16,fp8,0,0.6063413222630819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,64,128,1,fp8,fp8,0,0.5702613194783529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,64,0,1,float16,float16,0,2.3466506004333496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,64,128,1,float16,float16,0,0.6072266499201456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,64,0,1,float16,fp8,0,2.3459946314493814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,64,0,1,fp8,fp8,0,2.1801279385884604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,64,128,1,float16,fp8,0,0.6055200099945068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,64,128,1,fp8,fp8,0,0.5682719945907593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,64,0,1,float16,float16,0,2.348618666330973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,64,0,1,float16,fp8,0,2.3482240041097007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,64,0,1,fp8,fp8,0,2.181887944539388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,64,128,1,float16,float16,0,2.0137386322021484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,64,128,1,fp8,fp8,0,1.8410239219665527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,64,128,1,float16,fp8,0,2.0293173789978027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,64,128,1,float16,float16,0,2.0261279741923013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,64,0,1,float16,float16,0,9.37005360921224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,64,0,1,fp8,fp8,0,8.682624181111654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,64,0,1,float16,fp8,0,9.720528284708658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,64,128,1,float16,fp8,0,2.044981320699056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,64,128,1,fp8,fp8,0,1.8588053385416667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,64,0,1,float16,float16,0,9.595418930053711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,64,128,1,float16,float16,0,2.050325393676758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,64,128,1,float16,fp8,0,2.0696266492207847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,64,128,1,fp8,fp8,0,1.8894346555074055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,64,0,1,fp8,fp8,0,8.701973597208658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,64,0,1,float16,fp8,0,9.874890645345053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,64,128,1,float16,float16,0,1.1901333332061768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,64,0,1,float16,float16,0,10.085333506266275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,64,128,1,float16,fp8,0,1.2988266944885254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,64,128,1,fp8,fp8,0,1.1320693492889404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,64,0,1,float16,float16,0,5.658149083455403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,64,0,1,fp8,fp8,0,8.733941396077475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,64,0,1,float16,fp8,0,9.957013448079428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,64,128,1,float16,float16,0,1.0607306957244873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,64,128,1,float16,fp8,0,1.0696799755096436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,64,128,1,fp8,fp8,0,0.9732906818389893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,64,0,1,float16,fp8,0,5.121957461039226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,64,0,1,fp8,fp8,0,4.592954635620117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,64,128,1,float16,float16,0,1.0661653677622478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,64,0,1,float16,float16,0,4.986917177836101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,64,128,1,float16,fp8,0,1.1333386898040771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,64,0,1,fp8,fp8,0,4.430912017822266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,64,0,1,float16,fp8,0,4.789338747660319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,64,128,1,fp8,fp8,0,0.9820853074391683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,64,128,1,float16,float16,0,1.0780853430430095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,64,0,1,float16,float16,0,4.890111923217773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,64,128,1,float16,fp8,0,1.1465813318888347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,64,128,1,fp8,fp8,0,0.995306650797526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,64,0,1,fp8,fp8,0,4.439557393391927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,64,0,1,float16,fp8,0,4.818000157674153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,64,128,1,float16,float16,0,0.6562293370564779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,64,0,1,float16,float16,0,5.062639872233073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,64,128,1,float16,fp8,0,0.7065920035044352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,64,0,1,float16,float16,0,2.572271982828776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,64,128,1,fp8,fp8,0,0.6324479977289835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,64,0,1,fp8,fp8,0,4.454405466715495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,64,0,1,float16,fp8,0,4.926896095275879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,64,128,1,float16,float16,0,0.5959200064341227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,64,128,1,float16,fp8,0,0.5992960135142008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,64,0,1,fp8,fp8,0,2.3989866574605307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,64,0,1,float16,fp8,0,2.586842695871989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,64,128,1,fp8,fp8,0,0.5518720149993896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,64,0,1,float16,float16,0,2.4931467374165854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,64,128,1,float16,float16,0,0.5990613301595052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,64,128,1,float16,fp8,0,0.6042933464050293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,64,0,1,fp8,fp8,0,2.316469351450602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,64,0,1,float16,fp8,0,2.504800001780192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,64,128,1,fp8,fp8,0,0.5580373207728068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,64,128,1,float16,float16,0,0.6044319868087769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,64,0,1,float16,float16,0,2.4994400342305503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,64,128,1,float16,fp8,0,0.6132746537526449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,64,0,1,fp8,fp8,0,2.3205226262410483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,64,0,1,float16,fp8,0,2.5034079551696777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,64,128,1,fp8,fp8,0,0.5636959870656332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,64,128,1,float16,float16,0,0.4604053497314453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,64,0,1,float16,float16,0,2.5081653594970703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,64,128,1,float16,fp8,0,0.4609546661376953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,64,0,1,fp8,fp8,0,2.328437328338623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,64,0,1,float16,fp8,0,2.5840907096862793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,64,128,1,fp8,fp8,0,0.43062400817871094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,64,0,1,float16,float16,0,1.4573119481404622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,64,128,1,float16,float16,0,0.4580693244934082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,64,0,1,fp8,fp8,0,1.3510665893554688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,64,128,1,float16,fp8,0,0.4580746491750081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,64,0,1,float16,fp8,0,1.45797332127889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,64,0,1,float16,float16,0,1.44870392481486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,64,128,1,fp8,fp8,0,0.4323466618855794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,64,128,1,float16,float16,0,0.45943466822306317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,64,0,1,fp8,fp8,0,1.3474399248758953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,64,0,1,float16,fp8,0,1.4492425918579102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,64,128,1,float16,fp8,0,0.46036799748738605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,64,128,1,fp8,fp8,0,0.4303893248240153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,64,0,1,float16,float16,0,1.4502612749735515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,64,128,1,float16,float16,0,0.45894400278727215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,64,0,1,fp8,fp8,0,1.3480745951334636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,64,0,1,float16,fp8,0,1.4514026641845703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,64,128,1,float16,fp8,0,0.4609493414560954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,64,128,1,fp8,fp8,0,0.4308319886525472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,64,0,1,float16,float16,0,1.452250639597575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,64,0,1,float16,fp8,0,1.4537439346313477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,64,0,1,fp8,fp8,0,1.3487946192423503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,64,128,1,float16,float16,0,1.6738880475362141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,64,128,1,float16,fp8,0,1.6886132558186848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,64,128,1,fp8,fp8,0,1.5272852579752605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,64,0,1,float16,float16,0,6.892058690388997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,64,128,1,float16,float16,0,1.6839626630147297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,64,0,1,fp8,fp8,0,6.251007715861003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,64,0,1,float16,fp8,0,6.770250956217448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,64,128,1,float16,fp8,0,1.698954741160075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,64,128,1,fp8,fp8,0,1.5402026176452637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,64,128,1,float16,float16,0,1.7033227284749348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,64,0,1,float16,float16,0,6.796085357666016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,64,128,1,float16,fp8,0,1.7198400497436523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,64,0,1,float16,fp8,0,6.786314646402995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,64,0,1,fp8,fp8,0,6.263807932535808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,64,128,1,fp8,fp8,0,1.5643787384033203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,64,128,1,float16,float16,0,0.9930506547292074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,64,0,1,float16,float16,0,7.005903879801433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,64,128,1,float16,fp8,0,1.015887975692749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,64,128,1,fp8,fp8,0,0.9454879760742188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,64,0,1,fp8,fp8,0,6.288026809692383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,64,0,1,float16,float16,0,3.6329708099365234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,64,0,1,float16,fp8,0,7.04914665222168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,64,128,1,float16,float16,0,0.8859146436055502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,64,128,1,float16,fp8,0,0.8938186963399252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,64,0,1,fp8,fp8,0,3.3399572372436523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,64,128,1,fp8,fp8,0,0.8147839705149332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,64,0,1,float16,fp8,0,3.690554618835449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,64,0,1,float16,float16,0,3.671680132548014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,64,128,1,float16,float16,0,0.8897386391957601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,64,128,1,float16,fp8,0,0.898149331410726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,64,0,1,fp8,fp8,0,3.2057441075642905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,64,128,1,fp8,fp8,0,0.8201493422190348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,64,0,1,float16,fp8,0,3.6440534591674805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,64,0,1,float16,float16,0,3.501066525777181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,64,128,1,float16,float16,0,0.9241066773732504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,64,128,1,float16,fp8,0,0.9099733034769694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,64,0,1,fp8,fp8,0,3.212597211201986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,64,0,1,float16,fp8,0,3.4832159678141275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,64,128,1,fp8,fp8,0,0.8326026598612467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,64,128,1,float16,float16,0,0.5510826508204142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,64,0,1,float16,float16,0,3.4887892405192056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,64,128,1,float16,fp8,0,0.5641333262125651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,64,128,1,fp8,fp8,0,0.531221350034078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,64,0,1,float16,float16,0,1.8814667065938313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,64,0,1,fp8,fp8,0,3.2237278620402017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,64,0,1,float16,fp8,0,3.690826733907064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,64,128,1,float16,float16,0,0.5000906785329183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,64,0,1,float16,fp8,0,1.8967359860738118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,64,128,1,float16,fp8,0,0.5036746660868326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,64,0,1,fp8,fp8,0,1.7573439280192058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,64,128,1,fp8,fp8,0,0.46540268262227374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,64,0,1,float16,float16,0,1.8412106831868489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,64,128,1,float16,float16,0,0.5027413368225098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,64,0,1,float16,fp8,0,1.821407953898112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,64,128,1,float16,fp8,0,0.5070399840672811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,64,0,1,fp8,fp8,0,1.6905120213826497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,64,0,1,float16,float16,0,1.8232533137003581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,64,128,1,fp8,fp8,0,0.46805866559346515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,64,128,1,float16,float16,0,0.5072000026702881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,64,0,1,fp8,fp8,0,1.6928906440734863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,64,0,1,float16,fp8,0,1.827781359354655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,64,128,1,float16,fp8,0,0.5125600099563599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,64,128,1,fp8,fp8,0,0.47261865933736164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,64,0,1,float16,float16,0,1.8296799659729004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,64,128,1,float16,float16,0,0.3867306709289551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,64,128,1,float16,fp8,0,0.386624018351237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,64,0,1,fp8,fp8,0,1.698522726694743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,64,0,1,float16,float16,0,1.0869706471761067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,64,0,1,float16,fp8,0,1.83732271194458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,64,128,1,fp8,fp8,0,0.36183468500773114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,64,128,1,float16,float16,0,0.38433067003885907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,64,0,1,fp8,fp8,0,1.0061866442362468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,64,0,1,float16,fp8,0,1.0878400007883708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,64,128,1,float16,fp8,0,0.3843626578648885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,64,0,1,float16,float16,0,1.0790186723073323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,64,128,1,fp8,fp8,0,0.3612373272577922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,64,128,1,float16,float16,0,0.3853066762288411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,64,0,1,fp8,fp8,0,1.0040213267008464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,64,0,1,float16,fp8,0,1.078389326731364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,64,128,1,float16,fp8,0,0.3856373230616252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,64,128,1,fp8,fp8,0,0.3606559832890828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,64,0,1,float16,float16,0,1.0789173444112141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,64,128,1,float16,float16,0,0.3848106861114502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,64,0,1,float16,fp8,0,1.0798400243123372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,64,0,1,fp8,fp8,0,1.0061972935994465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,64,128,1,float16,fp8,0,0.3854186534881592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,64,0,1,float16,float16,0,1.0824159781138103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,64,128,1,fp8,fp8,0,0.3603359858194987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,64,0,1,fp8,fp8,0,1.0054826736450195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,64,0,1,float16,fp8,0,1.0821759700775146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,64,128,1,float16,float16,0,2.6415252685546875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,64,128,1,fp8,fp8,0,2.410970687866211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,64,128,1,float16,fp8,0,2.661402702331543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,64,128,1,float16,float16,0,2.6615947087605796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,64,0,1,float16,float16,0,9.512042363484701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,64,0,1,fp8,fp8,0,8.344149271647135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,64,128,1,float16,fp8,0,2.6817760467529297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,64,0,1,float16,fp8,0,9.064565022786459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,64,128,1,fp8,fp8,0,2.435983975728353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,64,0,1,float16,float16,0,9.063989639282227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,64,128,1,float16,float16,0,2.6967039108276367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,64,0,1,float16,fp8,0,9.874319712320963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,64,128,1,float16,fp8,0,2.716650644938151
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,64,0,1,fp8,fp8,0,8.369232177734375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,64,128,1,fp8,fp8,0,2.480565388997396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,64,128,1,float16,float16,0,1.5181973775227864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,64,0,1,float16,float16,0,9.116554896036783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,64,128,1,float16,fp8,0,1.5507413546244304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,64,0,1,fp8,fp8,0,8.416112263997396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,64,0,1,float16,float16,0,5.067978541056315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,64,128,1,fp8,fp8,0,1.438576062520345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,64,0,1,float16,fp8,0,9.13540776570638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,64,128,1,float16,float16,0,1.3386400540669758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,64,128,1,float16,fp8,0,1.3544960021972656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,64,0,1,fp8,fp8,0,4.431872049967448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,64,0,1,float16,fp8,0,4.92304007212321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,64,0,1,float16,float16,0,4.564223925272624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,64,128,1,fp8,fp8,0,1.223189353942871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,64,128,1,float16,float16,0,1.34716796875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,64,128,1,float16,fp8,0,1.3591680526733398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,64,0,1,fp8,fp8,0,4.2128801345825195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,64,0,1,float16,fp8,0,4.699125289916992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,64,128,1,fp8,fp8,0,1.2330613136291504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,64,0,1,float16,float16,0,4.575509389241536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,64,128,1,float16,float16,0,1.3618666330973308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,64,128,1,float16,fp8,0,1.376405398050944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,64,0,1,fp8,fp8,0,4.223930676778157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,64,0,1,float16,fp8,0,4.781680107116699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,64,128,1,fp8,fp8,0,1.2512853145599365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,64,0,1,float16,float16,0,4.600528081258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,64,128,1,float16,float16,0,0.7972533702850342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,64,128,1,float16,fp8,0,0.8162986437479655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,64,0,1,float16,float16,0,2.4595840771993003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,64,128,1,fp8,fp8,0,0.7608533700307211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,64,0,1,fp8,fp8,0,4.2434508005778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,64,0,1,float16,fp8,0,4.690560022989909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,64,128,1,float16,float16,0,0.7124959627787272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,64,0,1,fp8,fp8,0,2.2818986574808755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,64,0,1,float16,fp8,0,2.471738656361898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,64,128,1,float16,fp8,0,0.7195040384928385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,64,128,1,fp8,fp8,0,0.6569066842397054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,64,0,1,float16,float16,0,2.39791472752889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,64,128,1,float16,float16,0,0.7167733510335287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,64,0,1,float16,fp8,0,2.3568320274353027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,64,0,1,fp8,fp8,0,2.175802707672119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,64,128,1,float16,fp8,0,0.7232639789581299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,64,128,1,fp8,fp8,0,0.6607679923375448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,64,0,1,float16,float16,0,2.390511989593506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,64,128,1,float16,float16,0,0.7234506607055664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,64,0,1,fp8,fp8,0,2.1798720359802246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,64,0,1,float16,fp8,0,2.3630240758260093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,64,128,1,float16,fp8,0,0.7322346369425455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,64,128,1,fp8,fp8,0,0.6688746611277262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,64,0,1,float16,float16,0,2.514143943786621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,64,128,1,float16,float16,0,0.4453013340632121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,64,128,1,float16,fp8,0,0.45584531625111896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,64,0,1,float16,float16,0,1.297749360402425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,64,0,1,fp8,fp8,0,2.18994140625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,64,0,1,float16,fp8,0,2.3920373916625977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,64,128,1,fp8,fp8,0,0.4276160001754761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,64,128,1,float16,float16,0,0.40628798802693683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,64,128,1,float16,fp8,0,0.4122453530629476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,64,0,1,fp8,fp8,0,1.214138666788737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,64,0,1,float16,fp8,0,1.3090133666992188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,64,0,1,float16,float16,0,1.2459466457366943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,64,128,1,fp8,fp8,0,0.37722134590148926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,64,128,1,float16,float16,0,0.4063093264897664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,64,0,1,fp8,fp8,0,1.1593386332194011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,64,0,1,float16,fp8,0,1.250928004582723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,64,128,1,float16,fp8,0,0.41040531794230145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,64,0,1,float16,float16,0,1.2517866293589275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,64,128,1,fp8,fp8,0,0.3792693217595418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,64,128,1,float16,float16,0,0.4106186628341675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,64,0,1,float16,fp8,0,1.2559733390808105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,64,0,1,fp8,fp8,0,1.1618613402048747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,64,128,1,float16,fp8,0,0.41325334707895917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,64,0,1,float16,float16,0,1.2565813064575195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,64,128,1,fp8,fp8,0,0.38227200508117676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,64,128,1,float16,float16,0,0.31416000922520954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,64,0,1,float16,fp8,0,1.2617066701253254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,64,0,1,float16,float16,0,0.7683306535085043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,64,0,1,fp8,fp8,0,1.1660479704538982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,64,128,1,float16,fp8,0,0.31431466341018677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,64,128,1,fp8,fp8,0,0.2937013308207194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,64,128,1,float16,float16,0,0.31249066193898517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,64,0,1,float16,fp8,0,0.7700693607330322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,64,0,1,fp8,fp8,0,0.710431973139445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,64,128,1,float16,fp8,0,0.313914676507314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,64,128,1,fp8,fp8,0,0.2932746609052022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,64,0,1,float16,float16,0,0.759493350982666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,64,0,1,float16,fp8,0,0.762330691019694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,64,128,1,float16,float16,0,0.31251200040181476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,64,0,1,fp8,fp8,0,0.7089013258616129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,64,0,1,float16,float16,0,0.7612746556599935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,64,128,1,float16,fp8,0,0.3144746621449788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,64,128,1,fp8,fp8,0,0.2933066685994466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,64,0,1,float16,fp8,0,0.7626026471455892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,64,128,1,float16,float16,0,0.31465067466100055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,64,0,1,fp8,fp8,0,0.7075573603312174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,64,0,1,float16,float16,0,0.7640319665273031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,64,128,1,float16,fp8,0,0.31362666686375934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,64,128,1,fp8,fp8,0,0.2938613295555115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,64,0,1,fp8,fp8,0,0.7091200351715088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,64,0,1,float16,fp8,0,0.765893300374349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,64,128,1,float16,float16,0,1.9583679835001628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,64,128,1,float16,fp8,0,1.9729599952697754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,64,128,1,fp8,fp8,0,1.7829279899597168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,64,0,1,float16,float16,0,5.650917053222656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,64,0,1,float16,fp8,0,5.494293212890625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,64,128,1,float16,float16,0,1.970677375793457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,64,0,1,fp8,fp8,0,5.046773274739583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,64,128,1,fp8,fp8,0,1.7989013989766438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,64,128,1,float16,fp8,0,1.9865387280782063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,64,0,1,float16,float16,0,5.665738423665364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,64,128,1,float16,float16,0,1.9951146443684895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,64,0,1,float16,fp8,0,5.513050715128581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,64,128,1,float16,fp8,0,2.011727968851725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,64,0,1,fp8,fp8,0,5.064789454142253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,64,128,1,fp8,fp8,0,1.8316532770792644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,64,0,1,float16,float16,0,5.7247358957926435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,64,128,1,float16,float16,0,1.1399733225504558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,64,128,1,float16,fp8,0,1.1637972990671794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,64,0,1,float16,fp8,0,5.55190912882487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,64,0,1,fp8,fp8,0,5.097200075785319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,64,128,1,fp8,fp8,0,1.0773706436157227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,64,0,1,float16,float16,0,3.1736373901367188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,64,128,1,float16,float16,0,1.007973353068034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,64,128,1,float16,fp8,0,1.0164106686909993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,64,0,1,fp8,fp8,0,2.730410575866699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,64,0,1,float16,fp8,0,3.076565424601237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,64,0,1,float16,float16,0,3.0570720036824546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,64,128,1,fp8,fp8,0,0.9201866785685221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,64,128,1,float16,float16,0,1.013749361038208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,64,0,1,fp8,fp8,0,2.57098658879598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,64,0,1,float16,fp8,0,2.796991984049479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,64,128,1,float16,fp8,0,1.0240533351898193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,64,0,1,float16,float16,0,2.796874682108561
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,64,128,1,fp8,fp8,0,0.9275200366973877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,64,128,1,float16,float16,0,1.0258293151855469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,64,0,1,fp8,fp8,0,2.5772107442220054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,64,0,1,float16,fp8,0,2.8139947255452475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,64,128,1,float16,fp8,0,1.0358239809672039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,64,0,1,float16,float16,0,2.815648078918457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,64,128,1,fp8,fp8,0,0.9417920112609863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,64,128,1,float16,float16,0,0.6032266616821289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,64,128,1,float16,fp8,0,0.6159946521123251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,64,0,1,float16,float16,0,1.5225812594095867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,64,0,1,float16,fp8,0,2.8249333699544272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,64,0,1,fp8,fp8,0,2.5918453534444175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,64,128,1,fp8,fp8,0,0.5754506587982178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,64,128,1,float16,float16,0,0.5400533278783163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,64,0,1,float16,fp8,0,1.5373226801554363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,64,128,1,float16,fp8,0,0.5453066825866699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,64,0,1,fp8,fp8,0,1.4190559387207031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,64,0,1,float16,float16,0,1.4477386474609375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,64,128,1,fp8,fp8,0,0.4973440170288086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,64,128,1,float16,float16,0,0.5428160031636556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,64,0,1,fp8,fp8,0,1.3406933148701985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,64,0,1,float16,fp8,0,1.4533227284749348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,64,128,1,float16,fp8,0,0.5486613512039185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,64,0,1,float16,float16,0,1.454309304555257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,64,128,1,fp8,fp8,0,0.5023786624272665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,64,128,1,float16,float16,0,0.5485759973526001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,64,0,1,float16,fp8,0,1.4589227040608723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,64,0,1,fp8,fp8,0,1.3441707293192546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,64,128,1,float16,fp8,0,0.5540106693903605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,64,0,1,float16,float16,0,1.461583932240804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,64,128,1,fp8,fp8,0,0.5071093241373698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,64,128,1,float16,float16,0,0.33956801891326904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,64,0,1,float16,fp8,0,1.4699093500773113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,64,0,1,float16,float16,0,0.8169600168863932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,64,128,1,float16,fp8,0,0.34727998574574787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,64,0,1,fp8,fp8,0,1.3505066235860188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,64,128,1,fp8,fp8,0,0.32767999172210693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,64,128,1,float16,float16,0,0.30562132596969604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,64,0,1,fp8,fp8,0,0.765343983968099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,64,0,1,float16,fp8,0,0.8260160287221273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,64,128,1,float16,fp8,0,0.30802132685979206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,64,0,1,float16,float16,0,0.7773760159810384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,64,128,1,fp8,fp8,0,0.28573334217071533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,64,128,1,float16,float16,0,0.30798933903376263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,64,0,1,float16,fp8,0,0.7804053624471029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,64,0,1,fp8,fp8,0,0.7249013582865397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,64,0,1,float16,float16,0,0.7815252939860026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,64,128,1,float16,fp8,0,0.31006399790445965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,64,128,1,fp8,fp8,0,0.2876266638437907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,64,128,1,float16,float16,0,0.3120746612548828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,64,0,1,float16,fp8,0,0.7839999993642172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,64,0,1,fp8,fp8,0,0.7281813621520996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,64,0,1,float16,float16,0,0.7861493428548177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,64,128,1,float16,fp8,0,0.3160426616668701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,64,128,1,fp8,fp8,0,0.29190399249394733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,64,128,1,float16,float16,0,0.2404586672782898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,64,0,1,float16,float16,0,0.500485340754191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,64,0,1,float16,fp8,0,0.791274627049764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,64,0,1,fp8,fp8,0,0.7303786277770996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,64,128,1,float16,fp8,0,0.2389706571896871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,64,128,1,fp8,fp8,0,0.22324266036351523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,64,128,1,float16,float16,0,0.23496532440185547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,64,0,1,float16,fp8,0,0.5008906523386637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,64,0,1,fp8,fp8,0,0.4617813428243001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,64,128,1,float16,fp8,0,0.23749866088231406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,64,128,1,fp8,fp8,0,0.22362132867177328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,64,0,1,float16,float16,0,0.4963039954503377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,64,128,1,float16,float16,0,0.23640000820159912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,64,0,1,fp8,fp8,0,0.46190933386484784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,64,0,1,float16,fp8,0,0.49581865469614667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,64,128,1,float16,fp8,0,0.23759466409683228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,64,128,1,fp8,fp8,0,0.22403200467427573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,64,0,1,float16,float16,0,0.497599999109904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,64,128,1,float16,float16,0,0.23678400119145712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,64,0,1,fp8,fp8,0,0.4618240197499593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,64,0,1,float16,fp8,0,0.4970719814300537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,64,128,1,float16,fp8,0,0.237445334593455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,64,128,1,fp8,fp8,0,0.22259199619293213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,64,0,1,float16,float16,0,0.49688533941904706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,64,0,1,fp8,fp8,0,0.4610453446706136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,64,0,1,float16,fp8,0,0.49692265192667645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,64,128,1,float16,float16,0,2.60044272740682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,64,128,1,float16,fp8,0,2.6170719464619956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,64,128,1,fp8,fp8,0,2.3694666226704917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,64,0,1,float16,float16,0,5.558986663818359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,64,128,1,float16,float16,0,2.619642734527588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,64,0,1,float16,fp8,0,5.5763200124104815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,64,0,1,fp8,fp8,0,5.11030387878418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,64,128,1,float16,fp8,0,2.6404800415039062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,64,128,1,fp8,fp8,0,2.3923999468485513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,64,0,1,float16,float16,0,5.586309432983398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,64,128,1,float16,float16,0,2.657082716623942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,64,0,1,float16,fp8,0,5.609071731567383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,64,0,1,fp8,fp8,0,5.1346079508463545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,64,128,1,float16,fp8,0,2.6762186686197915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,64,0,1,float16,float16,0,5.6330610911051435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,64,128,1,fp8,fp8,0,2.436741352081299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,64,128,1,float16,float16,0,1.481397310892741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,64,0,1,float16,float16,0,3.006858507792155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,64,128,1,float16,fp8,0,1.5065919558207195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,64,0,1,float16,fp8,0,5.652581532796224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,64,128,1,fp8,fp8,0,1.397157351175944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,64,0,1,fp8,fp8,0,5.176901181538899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,64,128,1,float16,float16,0,1.2995519638061523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,64,0,1,float16,fp8,0,3.0322612126668296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,64,0,1,fp8,fp8,0,2.784261385599772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,64,128,1,float16,fp8,0,1.3127626578013103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,64,0,1,float16,float16,0,2.7928425470987954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,64,128,1,fp8,fp8,0,1.1840533415476482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,64,128,1,float16,float16,0,1.3093600273132324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,64,0,1,float16,fp8,0,2.8055572509765625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,64,0,1,fp8,fp8,0,2.5659093856811523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,64,0,1,float16,float16,0,2.8219734827677407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,64,128,1,float16,fp8,0,1.3212693532307942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,64,128,1,fp8,fp8,0,1.193552017211914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,64,128,1,float16,float16,0,1.3249599933624268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,64,0,1,float16,fp8,0,2.8163038889567056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,64,0,1,fp8,fp8,0,2.5749120712280273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,64,128,1,float16,fp8,0,1.3397706349690754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,64,0,1,float16,float16,0,2.8268000284830728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,64,128,1,fp8,fp8,0,1.2110079924265544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,64,128,1,float16,float16,0,0.764474630355835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,64,128,1,float16,fp8,0,0.7796533107757568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,64,0,1,float16,fp8,0,2.838864008585612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,64,0,1,float16,float16,0,1.5371947288513184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,64,0,1,fp8,fp8,0,2.595418612162272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,64,128,1,fp8,fp8,0,0.7238667011260986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,64,128,1,float16,float16,0,0.678272008895874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,64,0,1,fp8,fp8,0,1.4268266359965007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,64,0,1,float16,fp8,0,1.5550986925760906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,64,128,1,float16,fp8,0,0.6835467020670573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,64,0,1,float16,float16,0,1.4364585876464844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,64,128,1,fp8,fp8,0,0.6198453505833944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,64,128,1,float16,float16,0,0.6806986331939697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,64,0,1,float16,fp8,0,1.4405439694722493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,64,0,1,fp8,fp8,0,1.32205335299174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,64,0,1,float16,float16,0,1.441904067993164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,64,128,1,fp8,fp8,0,0.6247733434041342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,64,128,1,float16,fp8,0,0.6872266928354899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,64,0,1,float16,fp8,0,1.449402650197347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,64,128,1,float16,float16,0,0.6884693304697672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,64,0,1,fp8,fp8,0,1.327840010325114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,64,0,1,float16,float16,0,1.4531413714090984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,64,128,1,fp8,fp8,0,0.6345653136571249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,64,128,1,float16,fp8,0,0.696341355641683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,64,128,1,float16,float16,0,0.40855467319488525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,64,0,1,float16,float16,0,0.8078400293986002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,64,128,1,float16,fp8,0,0.4187839825948079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,64,0,1,float16,fp8,0,1.460213343302409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,64,0,1,fp8,fp8,0,1.3373279571533203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,64,128,1,fp8,fp8,0,0.3905653158823649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,64,128,1,float16,float16,0,0.365013321240743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,64,0,1,float16,fp8,0,0.8163466453552246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,64,0,1,fp8,fp8,0,0.752234697341919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,64,0,1,float16,float16,0,0.7545333703358968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,64,128,1,float16,fp8,0,0.3668160041173299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,64,128,1,fp8,fp8,0,0.33885331948598224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,64,128,1,float16,float16,0,0.36692798137664795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,64,0,1,float16,fp8,0,0.7578186988830566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,64,0,1,fp8,fp8,0,0.7003359794616699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,64,128,1,float16,fp8,0,0.37004268169403076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,64,0,1,float16,float16,0,0.7581973075866699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,64,128,1,fp8,fp8,0,0.34063998858133954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,64,128,1,float16,float16,0,0.37055468559265137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,64,0,1,float16,fp8,0,0.761023998260498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,64,0,1,fp8,fp8,0,0.7039039929707845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,64,128,1,float16,fp8,0,0.37459735075632733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,64,128,1,fp8,fp8,0,0.34598398208618164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,64,0,1,float16,float16,0,0.7640906969706217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,64,128,1,float16,float16,0,0.232314666112264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,64,0,1,float16,fp8,0,0.768069346745809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,64,0,1,fp8,fp8,0,0.7078239917755127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,64,0,1,float16,float16,0,0.4434826771418254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,64,128,1,float16,fp8,0,0.23857067028681436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,64,128,1,fp8,fp8,0,0.22432533899943033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,64,128,1,float16,float16,0,0.20638400316238403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,64,0,1,float16,fp8,0,0.4495733181635539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,64,0,1,fp8,fp8,0,0.4177653392155965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,64,128,1,float16,fp8,0,0.20771199464797974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,64,128,1,fp8,fp8,0,0.19732266664505005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,64,0,1,float16,float16,0,0.41333333651224774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,64,128,1,float16,float16,0,0.20850133895874023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,64,0,1,fp8,fp8,0,0.38819201787312824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,64,0,1,float16,fp8,0,0.41659732659657794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,64,128,1,fp8,fp8,0,0.1976319948832194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,64,128,1,float16,fp8,0,0.21059733629226685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,64,0,1,float16,float16,0,0.4168586730957031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,64,128,1,float16,float16,0,0.21175465981165567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,64,0,1,fp8,fp8,0,0.39107731978098553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,64,0,1,float16,fp8,0,0.4180426597595215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,64,128,1,float16,fp8,0,0.21380800008773804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,64,128,1,fp8,fp8,0,0.19954133033752441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,64,0,1,float16,float16,0,0.41954131921132404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,64,128,1,float16,float16,0,0.16761600971221924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,64,0,1,fp8,fp8,0,0.3926080067952474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,64,0,1,float16,fp8,0,0.4203146696090698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,64,0,1,float16,float16,0,0.28521599372227985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,64,128,1,float16,fp8,0,0.1666826605796814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,64,128,1,fp8,fp8,0,0.15784533818562826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,64,128,1,float16,float16,0,0.1645813286304474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,64,0,1,fp8,fp8,0,0.26412800947825116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,64,0,1,float16,fp8,0,0.2853440046310425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,64,128,1,fp8,fp8,0,0.15436266859372458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,64,128,1,float16,fp8,0,0.16457066933314005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,64,0,1,float16,float16,0,0.28169600168863934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,64,128,1,float16,float16,0,0.16476800044377646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,64,0,1,fp8,fp8,0,0.2624053359031677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,64,0,1,float16,fp8,0,0.2821600039800008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,64,0,1,float16,float16,0,0.28250133991241455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,64,128,1,float16,fp8,0,0.16484799981117249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,64,128,1,fp8,fp8,0,0.1544373333454132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,64,0,1,fp8,fp8,0,0.2629493276278178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,64,0,1,float16,fp8,0,0.2828853329022725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,64,128,1,float16,float16,0,0.1646986703077952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,64,0,1,float16,float16,0,0.28068800767262775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,64,128,1,float16,fp8,0,0.1649066706498464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,64,128,1,fp8,fp8,0,0.15477866927782694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,64,0,1,float16,fp8,0,0.282645324865977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,64,0,1,fp8,fp8,0,0.261135995388031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,64,128,1,float16,float16,0,1.9251093864440918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,64,128,1,fp8,fp8,0,1.7517493565877278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,64,128,1,float16,fp8,0,1.9404746691385906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,64,0,1,float16,float16,0,3.5036214192708335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,64,128,1,float16,float16,0,1.9393653869628906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,64,0,1,fp8,fp8,0,3.210810661315918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,64,0,1,float16,fp8,0,3.519455909729004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,64,128,1,float16,fp8,0,1.9554187456766765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,64,0,1,float16,float16,0,3.5212694803873696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,64,128,1,fp8,fp8,0,1.7704799969991047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,64,0,1,float16,fp8,0,3.5348374048868814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,64,128,1,float16,float16,0,1.965999921162923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,64,0,1,fp8,fp8,0,3.2256905237833657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,64,0,1,float16,float16,0,3.55020809173584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,64,128,1,fp8,fp8,0,1.8006505966186523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,64,128,1,float16,fp8,0,1.9827893575032551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,64,128,1,float16,float16,0,1.112229347229004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,64,0,1,float16,float16,0,1.9248533248901367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,64,0,1,float16,fp8,0,3.56661319732666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,64,128,1,float16,fp8,0,1.133344014485677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,64,0,1,fp8,fp8,0,3.257685343424479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,64,128,1,fp8,fp8,0,1.0493333339691162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,64,0,1,float16,fp8,0,1.9452266693115234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,64,128,1,float16,float16,0,0.9787093003590902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,64,0,1,fp8,fp8,0,1.788629372914632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,64,0,1,float16,float16,0,1.7773812611897786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,64,128,1,fp8,fp8,0,0.891599973042806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,64,128,1,float16,fp8,0,0.988159974416097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,64,128,1,float16,float16,0,0.9859680334726969
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,64,0,1,float16,fp8,0,1.7871252695719402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,64,0,1,fp8,fp8,0,1.6289226214090984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,64,0,1,float16,float16,0,1.7843626340230305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,64,128,1,fp8,fp8,0,0.8987360000610352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,64,128,1,float16,fp8,0,0.9945973555246989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,64,128,1,float16,float16,0,0.997978687286377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,64,0,1,float16,fp8,0,1.793717384338379
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,64,0,1,fp8,fp8,0,1.6367252667744954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,64,0,1,float16,float16,0,1.7986666361490886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,64,128,1,float16,fp8,0,1.0089013576507568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,64,128,1,fp8,fp8,0,0.9135626951853434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,64,128,1,float16,float16,0,0.5781973203023275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,64,0,1,float16,fp8,0,1.809125264485677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,64,0,1,float16,float16,0,0.9934026400248209
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,64,0,1,fp8,fp8,0,1.6498986879984539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,64,128,1,float16,fp8,0,0.5910613139470419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,64,128,1,fp8,fp8,0,0.548245350519816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,64,0,1,float16,fp8,0,1.0045173168182373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,64,0,1,fp8,fp8,0,0.9264106750488281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,64,128,1,float16,float16,0,0.5128213167190552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,64,128,1,fp8,fp8,0,0.47018667062123615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,64,128,1,float16,fp8,0,0.5162186622619629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,64,0,1,float16,float16,0,0.9197973410288492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,64,0,1,fp8,fp8,0,0.8471360206604004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,64,128,1,float16,float16,0,0.5150239864985148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,64,0,1,float16,fp8,0,0.9246986707051595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,64,128,1,float16,fp8,0,0.5193066596984863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,64,128,1,fp8,fp8,0,0.473471999168396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,64,0,1,float16,float16,0,0.9247146447499593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,64,0,1,fp8,fp8,0,0.8507200082143148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,64,0,1,float16,fp8,0,0.9283413092295328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,64,128,1,float16,float16,0,0.5220640103022257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,64,128,1,fp8,fp8,0,0.4797439972559611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,64,128,1,float16,fp8,0,0.5269920031229655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,64,0,1,float16,float16,0,0.9315839608510336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,64,128,1,float16,float16,0,0.31117866436640423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,64,0,1,fp8,fp8,0,0.8578666845957438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,64,0,1,float16,fp8,0,0.9370986620585123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,64,0,1,float16,float16,0,0.5279680093129476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,64,128,1,float16,fp8,0,0.3185013333956401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,64,128,1,fp8,fp8,0,0.2975520094235738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,64,128,1,float16,float16,0,0.275765339533488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,64,0,1,float16,fp8,0,0.5357066790262858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,64,0,1,fp8,fp8,0,0.4947520097096761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,64,0,1,float16,float16,0,0.4877599875132243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,64,128,1,float16,fp8,0,0.2773279945055644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,64,128,1,fp8,fp8,0,0.25760533412297565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,64,0,1,fp8,fp8,0,0.45510931809743244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,64,0,1,float16,fp8,0,0.4896053473154704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,64,128,1,float16,float16,0,0.277621328830719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,64,0,1,float16,float16,0,0.49163198471069336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,64,128,1,fp8,fp8,0,0.2601813276608785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,64,128,1,float16,fp8,0,0.28255999088287354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,64,0,1,float16,fp8,0,0.493557333946228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,64,128,1,float16,float16,0,0.28141866127649945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,64,0,1,fp8,fp8,0,0.4575999975204468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,64,0,1,float16,float16,0,0.49665598074595135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,64,128,1,float16,fp8,0,0.2861226598421733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,64,128,1,fp8,fp8,0,0.26338666677474976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,64,128,1,float16,float16,0,0.17706133921941122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,64,0,1,float16,fp8,0,0.49960533777872723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,64,0,1,fp8,fp8,0,0.4617066780726115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,64,0,1,float16,float16,0,0.29606399933497113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,64,128,1,float16,fp8,0,0.18145600954691568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,64,128,1,fp8,fp8,0,0.17283733685811362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,64,0,1,float16,fp8,0,0.3011893431345622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,64,128,1,float16,float16,0,0.15659733613332114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,64,0,1,fp8,fp8,0,0.28068800767262775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,64,0,1,float16,float16,0,0.27382399638493854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,64,128,1,float16,fp8,0,0.15680000185966492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,64,128,1,fp8,fp8,0,0.14623467127482095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,64,0,1,float16,fp8,0,0.27451199293136597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,64,0,1,fp8,fp8,0,0.25299733877182007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,64,128,1,float16,float16,0,0.1568106710910797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,64,0,1,float16,float16,0,0.2737119992574056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,64,128,1,float16,fp8,0,0.16057599584261575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,64,128,1,fp8,fp8,0,0.15018666783968607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,64,0,1,float16,fp8,0,0.27588800589243573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,64,128,1,float16,float16,0,0.15865600109100342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,64,0,1,fp8,fp8,0,0.2550666729609172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,64,0,1,float16,float16,0,0.2754826744397481
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,64,128,1,float16,fp8,0,0.16210666298866272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,64,128,1,fp8,fp8,0,0.15330132842063904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,64,0,1,float16,fp8,0,0.27695467074712116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,64,128,1,float16,float16,0,0.12618666887283325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,64,0,1,fp8,fp8,0,0.26128000020980835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,64,0,1,float16,float16,0,0.19660800695419312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,64,128,1,float16,fp8,0,0.12557866175969443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,64,128,1,fp8,fp8,0,0.11963199575742085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,64,0,1,float16,fp8,0,0.19702933231989542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,64,0,1,fp8,fp8,0,0.18346132834752402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,64,128,1,float16,float16,0,0.12495999534924825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,64,0,1,float16,float16,0,0.19449067115783691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,64,128,1,float16,fp8,0,0.12387733658154805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,64,128,1,fp8,fp8,0,0.11779733498891194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,64,0,1,float16,fp8,0,0.19353065888086954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,64,0,1,fp8,fp8,0,0.1813546617825826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,64,128,1,float16,float16,0,0.12370666861534119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,64,0,1,float16,float16,0,0.193338672320048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,64,128,1,float16,fp8,0,0.12567999958992004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,64,128,1,fp8,fp8,0,0.1178559958934784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,64,0,1,float16,fp8,0,0.19498666127522787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,64,128,1,float16,float16,0,0.12556800246238708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,64,0,1,fp8,fp8,0,0.18080000082651773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,64,0,1,float16,float16,0,0.19502399365107217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,64,128,1,float16,fp8,0,0.12385599811871846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,64,128,1,fp8,fp8,0,0.11744532982508342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,64,0,1,float16,fp8,0,0.19381866852442423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,64,0,1,fp8,fp8,0,0.18120000759760538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,64,128,1,fp8,fp8,0,2.3246560096740723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,64,128,1,float16,float16,0,2.6232213973999023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,64,128,1,float16,fp8,0,2.6170934041341147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,64,0,1,float16,float16,0,3.8606185913085938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,64,128,1,float16,float16,0,2.6313279469807944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,64,0,1,fp8,fp8,0,3.4666293462117515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,64,0,1,float16,fp8,0,3.8532800674438477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,64,128,1,float16,fp8,0,2.634559949239095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,64,128,1,fp8,fp8,0,2.34878937403361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,64,0,1,float16,float16,0,3.8702561060587564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,64,128,1,float16,float16,0,2.7322187423706055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,64,0,1,fp8,fp8,0,3.4909547170003257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,64,0,1,float16,fp8,0,3.872394561767578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,64,0,1,float16,float16,0,3.9793545405069985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,64,128,1,fp8,fp8,0,2.387183984120687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,64,128,1,float16,fp8,0,2.719829241434733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,64,128,1,float16,float16,0,1.4673120180765789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,64,0,1,float16,fp8,0,3.9685494105021157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,64,0,1,float16,float16,0,2.1071573893229165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,64,128,1,float16,fp8,0,1.4860000610351562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,64,128,1,fp8,fp8,0,1.37827730178833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,64,0,1,fp8,fp8,0,3.528010686238607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,64,0,1,float16,fp8,0,2.1260533332824707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,64,128,1,float16,float16,0,1.2841440041859944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,64,0,1,fp8,fp8,0,1.9576053619384766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,64,0,1,float16,float16,0,1.905098597208659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,64,128,1,float16,fp8,0,1.2932960192362468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,64,128,1,fp8,fp8,0,1.1624639828999836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,64,0,1,fp8,fp8,0,1.7386186917622883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,64,0,1,float16,fp8,0,1.9177066485087078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,64,128,1,float16,float16,0,1.2931146621704102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,64,0,1,float16,float16,0,1.919589360555013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,64,128,1,fp8,fp8,0,1.174837350845337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,64,128,1,float16,fp8,0,1.3036959966023762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,64,0,1,float16,fp8,0,1.9279786745707195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,64,0,1,fp8,fp8,0,1.750933329264323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,64,128,1,float16,float16,0,1.3095146814982097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,64,0,1,float16,float16,0,1.9366505940755208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,64,128,1,float16,fp8,0,1.3209546407063801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,64,128,1,fp8,fp8,0,1.1935839653015137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,64,128,1,float16,float16,0,0.7468852996826172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,64,0,1,float16,fp8,0,1.949061393737793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,64,0,1,float16,float16,0,1.0736266771952312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,64,128,1,float16,fp8,0,0.7616799672444662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,64,0,1,fp8,fp8,0,1.770037333170573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,64,128,1,fp8,fp8,0,0.7049919764200846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,64,128,1,float16,float16,0,0.6601866483688354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,64,0,1,float16,fp8,0,1.0884160200754802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,64,0,1,fp8,fp8,0,0.9997119903564453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,64,0,1,float16,float16,0,0.9761599699656168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,64,128,1,float16,fp8,0,0.6653333504994711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,64,128,1,fp8,fp8,0,0.5994826555252075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,64,0,1,float16,fp8,0,0.9825119972229004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,64,128,1,float16,float16,0,0.6649920145670573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,64,0,1,fp8,fp8,0,0.8935519854227701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,64,0,1,float16,float16,0,0.9815893173217773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,64,128,1,float16,fp8,0,0.6705280145009359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,64,128,1,fp8,fp8,0,0.6059146722157797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,64,0,1,float16,fp8,0,0.9892053604125977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,64,128,1,float16,float16,0,0.6712053616841634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,64,0,1,fp8,fp8,0,0.898570696512858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,64,0,1,float16,float16,0,0.989840030670166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,64,128,1,float16,fp8,0,0.6799680391947428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,64,128,1,fp8,fp8,0,0.6140106519063314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,64,128,1,float16,float16,0,0.39211201667785645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,64,0,1,float16,fp8,0,0.9970239798227946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,64,0,1,float16,float16,0,0.5605813264846802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,64,0,1,fp8,fp8,0,0.9077813625335693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,64,128,1,float16,fp8,0,0.40064533551534015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,64,128,1,fp8,fp8,0,0.3721119960149129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,64,128,1,float16,float16,0,0.34540800253550213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,64,0,1,fp8,fp8,0,0.5237493515014648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,64,0,1,float16,fp8,0,0.5690240065256754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,64,0,1,float16,float16,0,0.508784015973409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,64,128,1,fp8,fp8,0,0.32037333647410077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,64,128,1,float16,fp8,0,0.34779198964436847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,64,0,1,float16,fp8,0,0.5113173325856527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,64,0,1,fp8,fp8,0,0.47193066279093426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,64,128,1,float16,float16,0,0.34938665231068927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,64,0,1,float16,float16,0,0.5130720138549805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,64,128,1,float16,fp8,0,0.352234681447347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,64,128,1,fp8,fp8,0,0.3226986726125081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,64,0,1,float16,fp8,0,0.5154026746749878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,64,128,1,float16,float16,0,0.35469333330790204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,64,0,1,fp8,fp8,0,0.47414398193359375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,64,0,1,float16,float16,0,0.5180480082829794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,64,128,1,float16,fp8,0,0.35622934500376385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,64,128,1,fp8,fp8,0,0.32793599367141724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,64,0,1,float16,fp8,0,0.5225066741307577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,64,128,1,float16,float16,0,0.2141759991645813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,64,0,1,float16,float16,0,0.3025173346201579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,64,0,1,fp8,fp8,0,0.47815465927124023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,64,128,1,float16,fp8,0,0.21906665960947672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,64,128,1,fp8,fp8,0,0.20594666401545206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,64,0,1,float16,fp8,0,0.3073813319206238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,64,0,1,fp8,fp8,0,0.28649065891901654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,64,128,1,float16,float16,0,0.18574933211008707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,64,0,1,float16,float16,0,0.2725813388824463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,64,128,1,float16,fp8,0,0.18779732783635458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,64,128,1,fp8,fp8,0,0.1771519978841146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,64,0,1,float16,fp8,0,0.2740373412768046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,64,0,1,fp8,fp8,0,0.25762667258580524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,64,128,1,float16,float16,0,0.187226672967275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,64,0,1,float16,float16,0,0.2744213342666626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,64,128,1,float16,fp8,0,0.18924800554911295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,64,128,1,fp8,fp8,0,0.17886932690938315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,64,0,1,float16,fp8,0,0.27634666363398236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,64,0,1,fp8,fp8,0,0.26043200492858887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,64,128,1,float16,float16,0,0.19091200828552246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,64,128,1,float16,fp8,0,0.19324266910552979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,64,0,1,float16,float16,0,0.27743999163309735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,64,128,1,fp8,fp8,0,0.18138132492701212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,64,128,1,float16,float16,0,0.12422399719556172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,64,0,1,float16,fp8,0,0.28040534257888794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,64,0,1,fp8,fp8,0,0.26200000445048016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,64,0,1,float16,float16,0,0.17473600308100382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,64,128,1,float16,fp8,0,0.12783466776212057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,64,128,1,fp8,fp8,0,0.12273066242535909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,64,0,1,float16,fp8,0,0.1766080061594645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,64,128,1,float16,float16,0,0.11152000228563945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,64,0,1,fp8,fp8,0,0.16699200868606567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,64,128,1,fp8,fp8,0,0.10166933139165242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,64,128,1,float16,fp8,0,0.11180800199508667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,64,0,1,float16,float16,0,0.1606880029042562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,64,128,1,float16,float16,0,0.11158399780591328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,64,0,1,fp8,fp8,0,0.14797332882881165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,64,0,1,float16,fp8,0,0.16271467010180155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,64,128,1,float16,fp8,0,0.11136533816655476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,64,128,1,fp8,fp8,0,0.10331733028093974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,64,0,1,float16,float16,0,0.16013333201408386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,64,128,1,float16,float16,0,0.11170666416486104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,64,0,1,float16,fp8,0,0.1602079967657725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,64,0,1,fp8,fp8,0,0.14822933077812195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,64,128,1,fp8,fp8,0,0.10341333349545796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,64,128,1,float16,fp8,0,0.11249066392580669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,64,0,1,float16,float16,0,0.16239999731381735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,64,128,1,float16,float16,0,0.09128533800443013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,64,0,1,float16,fp8,0,0.16294399897257486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,64,0,1,fp8,fp8,0,0.14834133783976236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,64,0,1,float16,float16,0,0.12145599722862244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,64,128,1,float16,fp8,0,0.08923199772834778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,64,128,1,fp8,fp8,0,0.08684266606966655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,64,0,1,float16,fp8,0,0.12097600102424622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,64,128,1,float16,float16,0,0.08937600255012512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,64,0,1,fp8,fp8,0,0.11354133486747742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,64,128,1,float16,fp8,0,0.08905067046483357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,64,128,1,fp8,fp8,0,0.08463999629020691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,64,0,1,float16,float16,0,0.12016533811887105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,64,0,1,float16,fp8,0,0.12054399649302165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,64,128,1,float16,float16,0,0.088837335507075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,64,0,1,fp8,fp8,0,0.11217600107192993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,64,128,1,float16,fp8,0,0.09079999725023906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,64,128,1,fp8,fp8,0,0.08452266454696655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,64,0,1,float16,float16,0,0.12164800365765889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,64,0,1,float16,fp8,0,0.12161067128181458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,64,128,1,float16,float16,0,0.09066133697827657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,64,0,1,fp8,fp8,0,0.11369599898656209
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,64,128,1,float16,fp8,0,0.09061866998672485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,64,128,1,fp8,fp8,0,0.08517332871754964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,64,0,1,float16,float16,0,0.12187199791272481
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,64,0,1,float16,fp8,0,0.12172800302505493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,64,0,1,fp8,fp8,0,0.11334400375684102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,64,128,1,float16,float16,0,1.9313119252522786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,64,128,1,float16,fp8,0,1.9383145968119304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,64,0,1,float16,float16,0,2.5414346059163413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,64,128,1,fp8,fp8,0,1.7349653244018555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,64,0,1,float16,fp8,0,2.5469706853230796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,64,0,1,fp8,fp8,0,2.296346664428711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,64,128,1,float16,float16,0,1.9447840054829915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,64,0,1,float16,float16,0,2.561562697092692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,64,128,1,fp8,fp8,0,1.7474133173624675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,64,128,1,float16,fp8,0,1.9517653783162434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,64,0,1,float16,fp8,0,2.5614399909973145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,64,0,1,fp8,fp8,0,2.3094080289204917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,64,128,1,float16,float16,0,2.006927967071533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,64,0,1,float16,float16,0,2.62390931447347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,64,128,1,float16,fp8,0,1.9858400026957195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,64,128,1,fp8,fp8,0,1.777978738149007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,64,128,1,float16,float16,0,1.1031520366668701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,64,0,1,float16,fp8,0,2.599642594655355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,64,0,1,fp8,fp8,0,2.3390026092529297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,64,128,1,float16,fp8,0,1.11845866839091
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,64,0,1,float16,float16,0,1.4255785942077637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,64,128,1,fp8,fp8,0,1.0350933074951172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,64,128,1,float16,float16,0,0.9644959767659506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,64,0,1,float16,fp8,0,1.4396905899047852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,64,0,1,fp8,fp8,0,1.3207306861877441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,64,0,1,float16,float16,0,1.2751253445943196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,64,128,1,fp8,fp8,0,0.8769813378651937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,64,128,1,float16,fp8,0,0.9737706979115804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,64,0,1,float16,fp8,0,1.2823786735534668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,64,0,1,fp8,fp8,0,1.1596746444702148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,64,128,1,float16,float16,0,0.9704960187276205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,64,0,1,float16,float16,0,1.2808053493499756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,64,128,1,float16,fp8,0,0.9805066585540771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,64,128,1,fp8,fp8,0,0.8835253715515137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,64,0,1,float16,fp8,0,1.2890933354695637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,64,0,1,fp8,fp8,0,1.1660479704538982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,64,128,1,float16,float16,0,0.984666665395101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,64,0,1,float16,float16,0,1.29530668258667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,64,128,1,float16,fp8,0,0.9953813552856445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,64,128,1,fp8,fp8,0,0.8982666333516439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,64,0,1,float16,fp8,0,1.3058186372121174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,64,128,1,float16,float16,0,0.5659146706263224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,64,0,1,fp8,fp8,0,1.1820212999979656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,64,0,1,float16,float16,0,0.7286453247070312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,64,128,1,float16,fp8,0,0.5746399958928426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,64,128,1,fp8,fp8,0,0.5332586765289307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,64,0,1,float16,fp8,0,0.7403039932250977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,64,0,1,fp8,fp8,0,0.6808373133341471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,64,128,1,float16,float16,0,0.49728532632191974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,64,0,1,float16,float16,0,0.655290683110555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,64,128,1,fp8,fp8,0,0.4545919895172119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,64,128,1,float16,fp8,0,0.5016266504923502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,64,128,1,float16,float16,0,0.5006773471832275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,64,0,1,float16,fp8,0,0.6599253416061401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,64,0,1,fp8,fp8,0,0.6004480123519897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,64,128,1,float16,fp8,0,0.5047306617101034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,64,128,1,fp8,fp8,0,0.458346684773763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,64,0,1,float16,float16,0,0.660149335861206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,64,0,1,fp8,fp8,0,0.6032106479008993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,64,128,1,float16,float16,0,0.5069760084152222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,64,0,1,float16,fp8,0,0.6629226605097452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,64,128,1,fp8,fp8,0,0.4662453333536784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,64,128,1,float16,fp8,0,0.5127626657485962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,64,0,1,float16,float16,0,0.6677599747975668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,64,128,1,float16,float16,0,0.29837334156036377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,64,0,1,fp8,fp8,0,0.6112480163574219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,64,0,1,float16,fp8,0,0.6727093060811361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,64,0,1,float16,float16,0,0.3859039942423503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,64,128,1,float16,fp8,0,0.30449599027633667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,64,128,1,fp8,fp8,0,0.2834773262341817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,64,0,1,float16,fp8,0,0.39181331793467206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,64,128,1,float16,float16,0,0.2597973346710205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,64,0,1,fp8,fp8,0,0.3605333169301351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,64,128,1,float16,fp8,0,0.26267733176549274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,64,0,1,float16,float16,0,0.3421440124511719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,64,128,1,fp8,fp8,0,0.24326932430267334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,64,0,1,fp8,fp8,0,0.31890666484832764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,64,128,1,float16,float16,0,0.2620533307393392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,64,0,1,float16,fp8,0,0.3453386624654134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,64,0,1,float16,float16,0,0.3457760016123454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,64,128,1,float16,fp8,0,0.26472532749176025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,64,128,1,fp8,fp8,0,0.24632000923156738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,64,0,1,float16,fp8,0,0.34700266520182294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,64,0,1,fp8,fp8,0,0.32232532898585003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,64,128,1,float16,float16,0,0.26710933446884155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,64,0,1,float16,float16,0,0.35146133104960126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,64,128,1,float16,fp8,0,0.2711413304011027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,64,128,1,fp8,fp8,0,0.2490826646486918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,64,0,1,float16,fp8,0,0.3535146713256836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,64,128,1,float16,float16,0,0.1630453368028005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,64,0,1,float16,float16,0,0.21070400873819986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,64,0,1,fp8,fp8,0,0.32658666372299194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,64,128,1,float16,fp8,0,0.16774400075276694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,64,128,1,fp8,fp8,0,0.1583146651585897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,64,0,1,float16,fp8,0,0.21547732750574747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,64,0,1,fp8,fp8,0,0.20053333044052124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,64,128,1,float16,float16,0,0.14015466968218485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,64,0,1,float16,float16,0,0.18597332636515299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,64,128,1,fp8,fp8,0,0.13015466928482056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,64,128,1,float16,fp8,0,0.14247467120488486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,64,0,1,float16,fp8,0,0.18823466698328653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,64,0,1,fp8,fp8,0,0.17309866348902384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,64,128,1,float16,float16,0,0.1404266655445099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,64,0,1,float16,float16,0,0.1863306760787964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,64,128,1,float16,fp8,0,0.14216533303260803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,64,128,1,fp8,fp8,0,0.13395200173060098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,64,0,1,float16,fp8,0,0.18875199556350708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,64,0,1,fp8,fp8,0,0.17522132396697998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,64,128,1,float16,float16,0,0.14301333824793497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,64,0,1,float16,float16,0,0.19056532780329385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,64,128,1,float16,fp8,0,0.14568000038464865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,64,128,1,fp8,fp8,0,0.14007467031478882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,64,0,1,float16,fp8,0,0.19239999850591025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,64,128,1,float16,float16,0,0.09317866961161296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,64,0,1,fp8,fp8,0,0.1811199982961019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,64,0,1,float16,float16,0,0.12159466743469238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,64,128,1,float16,fp8,0,0.09564800063769023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,64,128,1,fp8,fp8,0,0.09512000282605489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,64,0,1,float16,fp8,0,0.12249599893887837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,64,0,1,fp8,fp8,0,0.12006400028864543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,64,128,1,float16,float16,0,0.08461333314577739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,64,0,1,float16,float16,0,0.11359467109044392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,64,128,1,float16,fp8,0,0.08493866523106892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,64,128,1,fp8,fp8,0,0.07865066826343536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,64,0,1,float16,fp8,0,0.11185066898663838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,64,128,1,float16,float16,0,0.08597333232561748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,64,0,1,fp8,fp8,0,0.1055573324362437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,64,0,1,float16,float16,0,0.11129599809646606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,64,128,1,float16,fp8,0,0.08540800213813782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,64,128,1,fp8,fp8,0,0.08044800162315369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,64,0,1,float16,fp8,0,0.11379200220108032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,64,0,1,fp8,fp8,0,0.10401599605878194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,64,128,1,float16,float16,0,0.0848586658636729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,64,0,1,float16,float16,0,0.11239999532699585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,64,128,1,float16,fp8,0,0.0867786705493927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,64,128,1,fp8,fp8,0,0.08032533526420593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,64,0,1,float16,fp8,0,0.11401599645614624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,64,128,1,float16,float16,0,0.06836799780527751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,64,0,1,fp8,fp8,0,0.10521599650382996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,64,0,1,float16,float16,0,0.08990400036176045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,64,128,1,float16,fp8,0,0.06921066840489705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,64,128,1,fp8,fp8,0,0.06621866424878438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,64,0,1,float16,fp8,0,0.08902399738629659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,64,0,1,fp8,fp8,0,0.08492799599965413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,64,128,1,float16,float16,0,0.06866666674613953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,64,128,1,float16,fp8,0,0.07047999898592631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,64,0,1,float16,float16,0,0.08854400118192036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,64,128,1,fp8,fp8,0,0.06619200110435486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,64,0,1,float16,fp8,0,0.08921066919962566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,64,0,1,fp8,fp8,0,0.0846453309059143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,64,128,1,float16,float16,0,0.06837333242098491
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,64,0,1,float16,float16,0,0.08894933263460796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,64,128,1,float16,fp8,0,0.06865066786607106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,64,128,1,fp8,fp8,0,0.06446399788061778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,64,128,1,float16,float16,0,0.06831466654936473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,64,0,1,float16,fp8,0,0.08924266695976257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,64,0,1,fp8,fp8,0,0.08307200173536937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,64,0,1,float16,float16,0,0.08899199962615967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,64,128,1,float16,fp8,0,0.06833600004514058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,64,128,1,fp8,fp8,0,0.0665280024210612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,64,0,1,fp8,fp8,0,0.08477866649627686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,64,0,1,float16,fp8,0,0.08820266524950664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,64,128,1,float16,float16,0,2.289034684499105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,64,128,1,float16,fp8,0,2.274661382039388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,64,0,1,float16,float16,0,2.6805121103922525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,64,128,1,fp8,fp8,0,2.231856028238932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,64,0,1,float16,fp8,0,2.6766398747762046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,64,0,1,fp8,fp8,0,2.5766666730244956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,64,128,1,float16,float16,0,2.2996373176574707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,64,128,1,fp8,fp8,0,2.285050710042318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,64,128,1,float16,fp8,0,2.28328529993693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,64,0,1,float16,float16,0,2.6889120737711587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,64,0,1,float16,fp8,0,2.6941760381062827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,64,128,1,float16,float16,0,2.348298708597819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,64,0,1,fp8,fp8,0,2.647856076558431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,64,0,1,float16,float16,0,2.716453234354655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,64,128,1,float16,fp8,0,2.3184053103129068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,64,128,1,fp8,fp8,0,2.2470134099324546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,64,0,1,float16,fp8,0,2.693584124247233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,64,0,1,fp8,fp8,0,2.6352319717407227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,64,128,1,float16,float16,0,1.2408373355865479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,64,0,1,float16,float16,0,1.4552906354268391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,64,128,1,float16,fp8,0,1.2148746649424236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,64,128,1,fp8,fp8,0,1.240613301595052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,64,0,1,float16,fp8,0,1.4355626106262207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,64,128,1,float16,float16,0,1.1525013446807861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,64,0,1,fp8,fp8,0,1.4270133972167969
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,64,0,1,float16,float16,0,1.3528587023417156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,64,128,1,float16,fp8,0,1.1488640308380127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,64,128,1,fp8,fp8,0,1.0780746936798096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,64,0,1,float16,fp8,0,1.3511306444803874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,64,0,1,fp8,fp8,0,1.255413293838501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,64,128,1,float16,float16,0,1.1560693581899006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,64,0,1,float16,float16,0,1.35261869430542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,64,128,1,float16,fp8,0,1.1521493593851726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,64,128,1,fp8,fp8,0,1.1031733353932698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,64,0,1,float16,fp8,0,1.3539093335469563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,64,0,1,fp8,fp8,0,1.3024160067240398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,64,128,1,float16,float16,0,1.164090633392334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,64,0,1,float16,float16,0,1.3638453483581543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,64,128,1,float16,fp8,0,1.1592586835225422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,64,128,1,fp8,fp8,0,1.100709358851115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,64,128,1,float16,float16,0,0.6321386496225992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,64,0,1,float16,fp8,0,1.3600959777832031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,64,0,1,fp8,fp8,0,1.2839840253194172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,64,128,1,float16,fp8,0,0.6190986633300781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,64,0,1,float16,float16,0,0.7391466299692789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,64,128,1,fp8,fp8,0,0.6171199878056844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,64,0,1,float16,fp8,0,0.7290666898091634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,64,128,1,float16,float16,0,0.5854186614354452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,64,0,1,fp8,fp8,0,0.710863987604777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,64,0,1,float16,float16,0,0.68722136815389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,64,128,1,float16,fp8,0,0.5844906568527222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,64,128,1,fp8,fp8,0,0.5450186729431152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,64,0,1,float16,fp8,0,0.687818686167399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,64,0,1,fp8,fp8,0,0.6364373366038004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,64,128,1,float16,float16,0,0.587002674738566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,64,0,1,float16,float16,0,0.6893813610076904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,64,128,1,float16,fp8,0,0.5875093142191569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,64,128,1,fp8,fp8,0,0.5476799805959066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,64,0,1,float16,fp8,0,0.686778704325358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,64,0,1,fp8,fp8,0,0.6395306587219238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,64,128,1,float16,float16,0,0.5928800106048584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,64,0,1,float16,float16,0,0.6955786546071371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,64,128,1,float16,fp8,0,0.5896639823913574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,64,128,1,fp8,fp8,0,0.5542186498641968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,64,0,1,float16,fp8,0,0.6946720282236735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,64,128,1,float16,float16,0,0.3307573397954305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,64,0,1,float16,float16,0,0.38629333178202313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,64,128,1,float16,fp8,0,0.32180800040562946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,64,0,1,fp8,fp8,0,0.6503359874089559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,64,128,1,fp8,fp8,0,0.3195786674817403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,64,0,1,float16,fp8,0,0.3799146811167399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,64,128,1,float16,float16,0,0.30390934149424237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,64,0,1,fp8,fp8,0,0.3679200013478597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,64,0,1,float16,float16,0,0.35602132479349774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,64,128,1,float16,fp8,0,0.3016960024833679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,64,128,1,fp8,fp8,0,0.28352532784144086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,64,0,1,float16,fp8,0,0.356879989306132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,64,0,1,fp8,fp8,0,0.33196266492207843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,64,128,1,float16,float16,0,0.3039733370145162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,64,0,1,float16,float16,0,0.35728001594543457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,64,128,1,float16,fp8,0,0.3042293389638265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,64,128,1,fp8,fp8,0,0.28339733680089313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,64,0,1,float16,fp8,0,0.355840007464091
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,64,0,1,fp8,fp8,0,0.3330026666323344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,64,128,1,float16,float16,0,0.308570663134257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,64,0,1,float16,float16,0,0.36029334863026935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,64,128,1,float16,fp8,0,0.3067786693572998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,64,128,1,fp8,fp8,0,0.2895466685295105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,64,0,1,float16,fp8,0,0.36076800028483075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,64,128,1,float16,float16,0,0.17441066106160483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,64,0,1,fp8,fp8,0,0.3383093277613322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,64,0,1,float16,float16,0,0.2052639921506246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,64,128,1,float16,fp8,0,0.17197332779566446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,64,128,1,fp8,fp8,0,0.1697333256403605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,64,0,1,float16,fp8,0,0.202074666817983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,64,128,1,float16,float16,0,0.15941866238911948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,64,0,1,fp8,fp8,0,0.19644800821940103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,64,0,1,float16,float16,0,0.18724799156188965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,64,128,1,float16,fp8,0,0.15914666652679443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,64,128,1,fp8,fp8,0,0.15026133259137472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,64,0,1,float16,fp8,0,0.18851200739542642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,64,0,1,fp8,fp8,0,0.1765706737836202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,64,128,1,float16,float16,0,0.16012266278266907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,64,128,1,float16,fp8,0,0.1616106629371643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,64,128,1,fp8,fp8,0,0.1523253321647644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,64,0,1,float16,float16,0,0.18874132633209229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,64,0,1,float16,fp8,0,0.1888266603151957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,64,128,1,float16,float16,0,0.1622986694176992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,64,0,1,fp8,fp8,0,0.1783626675605774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,64,128,1,fp8,fp8,0,0.15548800428708395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,64,128,1,float16,fp8,0,0.16268799702326456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,64,0,1,float16,float16,0,0.1914880077044169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,64,128,1,float16,float16,0,0.09852266311645508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,64,0,1,float16,fp8,0,0.18984532356262207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,64,0,1,fp8,fp8,0,0.18057066202163696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,64,0,1,float16,float16,0,0.11581866939862569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,64,128,1,float16,fp8,0,0.09694400429725647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,64,128,1,fp8,fp8,0,0.09802132844924927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,64,0,1,float16,fp8,0,0.11504532893498738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,64,128,1,float16,float16,0,0.08953600128491719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,64,0,1,fp8,fp8,0,0.11336533228556316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,64,0,1,float16,float16,0,0.10707199573516846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,64,128,1,float16,fp8,0,0.08995200196901958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,64,128,1,fp8,fp8,0,0.08211733400821686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,64,0,1,float16,fp8,0,0.10717866818110149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,64,0,1,fp8,fp8,0,0.09718400239944458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,64,128,1,float16,float16,0,0.08996267120043437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,64,0,1,float16,float16,0,0.10586667060852051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,64,128,1,float16,fp8,0,0.08901866277058919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,64,128,1,fp8,fp8,0,0.08241599798202515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,64,0,1,float16,fp8,0,0.10600533088048299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,64,0,1,fp8,fp8,0,0.09753599762916565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,64,128,1,float16,float16,0,0.08955732981363933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,64,128,1,float16,fp8,0,0.08916266759236653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,64,0,1,float16,float16,0,0.10689066847165425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,64,128,1,fp8,fp8,0,0.08361066381136577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,64,128,1,float16,float16,0,0.05686399837334951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,64,0,1,float16,fp8,0,0.10654399792353313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,64,0,1,fp8,fp8,0,0.09861333171526591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,64,0,1,float16,float16,0,0.06669333577156067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,64,128,1,float16,fp8,0,0.05612266560395559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,64,128,1,fp8,fp8,0,0.05579199890295664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,64,0,1,float16,fp8,0,0.0666133314371109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,64,0,1,fp8,fp8,0,0.06460266808668773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,64,128,1,float16,float16,0,0.05452266832192739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,64,0,1,float16,float16,0,0.06490666667620341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,64,128,1,float16,fp8,0,0.05412800113360087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,64,128,1,fp8,fp8,0,0.05182399849096934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,64,0,1,float16,fp8,0,0.06497600177923839
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,64,0,1,fp8,fp8,0,0.06022400160630544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,64,128,1,float16,float16,0,0.054058666030565895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,64,0,1,float16,float16,0,0.06514133512973785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,64,128,1,float16,fp8,0,0.05436266462008158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,64,128,1,fp8,fp8,0,0.05077866713205973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,64,0,1,float16,fp8,0,0.06414400041103363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,64,0,1,fp8,fp8,0,0.06039466460545858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,64,128,1,float16,fp8,0,0.05385600030422211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,64,0,1,float16,float16,0,0.06504533191521962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,64,128,1,fp8,fp8,0,0.05115733544031779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,64,128,1,float16,float16,0,0.05490666627883911
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,64,0,1,float16,fp8,0,0.06440000236034393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,64,128,1,float16,float16,0,0.03967999915281931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,64,0,1,fp8,fp8,0,0.06150400141874949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,64,0,1,float16,float16,0,0.04640000065167745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,64,128,1,float16,fp8,0,0.039493332306543984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,64,128,1,fp8,fp8,0,0.03965866565704346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,64,0,1,float16,fp8,0,0.04553066690762838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,64,0,1,fp8,fp8,0,0.04391466577847799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,64,128,1,float16,float16,0,0.039077334105968475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,64,0,1,float16,float16,0,0.04435733457406362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,64,128,1,float16,fp8,0,0.03773866593837738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,64,0,1,float16,fp8,0,0.045328001181284584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,64,128,1,fp8,fp8,0,0.03661333272854487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,64,0,1,fp8,fp8,0,0.04208533465862274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,64,128,1,float16,float16,0,0.0378506655494372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,64,0,1,float16,float16,0,0.04558933277924856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,64,128,1,fp8,fp8,0,0.03677333394686381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,64,0,1,float16,fp8,0,0.045109331607818604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,64,128,1,float16,fp8,0,0.038634667793909706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,64,0,1,fp8,fp8,0,0.04373333354791006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,64,128,1,float16,float16,0,0.03754666695992152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,64,0,1,float16,float16,0,0.045994664231936135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,64,128,1,float16,fp8,0,0.0401653324564298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,64,128,1,fp8,fp8,0,0.03774933268626531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,64,0,1,float16,fp8,0,0.04586666822433472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,64,0,1,fp8,fp8,0,0.04399999976158142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,64,128,1,float16,float16,0,2.223130702972412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,64,0,1,float16,float16,0,2.253573258717855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,64,128,1,float16,fp8,0,2.215493361155192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,64,128,1,fp8,fp8,0,2.1590986251831055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,64,0,1,float16,fp8,0,2.2428959210713706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,64,0,1,fp8,fp8,0,2.181861400604248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,64,128,1,float16,float16,0,2.245317300160726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,64,0,1,float16,float16,0,2.260805288950602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,64,128,1,float16,fp8,0,2.21724796295166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,64,128,1,fp8,fp8,0,2.222485383351644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,64,0,1,float16,fp8,0,2.25545597076416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,64,0,1,fp8,fp8,0,2.245946725209554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,64,128,1,float16,float16,0,2.2873279253641763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,64,0,1,float16,float16,0,2.2858880360921225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,64,128,1,float16,fp8,0,2.257381280263265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,64,128,1,fp8,fp8,0,2.2114826838175454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,64,0,1,float16,fp8,0,2.3123413721720376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,64,128,1,float16,float16,0,1.207477331161499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,64,0,1,float16,float16,0,1.2291786670684814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,64,128,1,float16,fp8,0,1.1958133379618328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,64,0,1,fp8,fp8,0,2.2322667439778647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,64,128,1,fp8,fp8,0,1.1992586453755696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,64,0,1,float16,fp8,0,1.2143146991729736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,64,0,1,fp8,fp8,0,1.2107893625895183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,64,128,1,float16,float16,0,1.1176053682963054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,64,0,1,float16,float16,0,1.1336159706115723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,64,128,1,float16,fp8,0,1.1154560248057048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,64,128,1,fp8,fp8,0,1.042522668838501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,64,0,1,float16,fp8,0,1.1326826413472493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,64,0,1,fp8,fp8,0,1.0657227039337158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,64,128,1,float16,float16,0,1.1201653480529785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,64,0,1,float16,float16,0,1.1333866914113362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,64,128,1,float16,fp8,0,1.1153279940287273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,64,128,1,fp8,fp8,0,1.0823840300242107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,64,0,1,float16,fp8,0,1.135482629140218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,64,0,1,fp8,fp8,0,1.0995840231577556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,64,128,1,float16,float16,0,1.1266506512959797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,64,0,1,float16,float16,0,1.1475893656412761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,64,128,1,float16,fp8,0,1.1229493618011475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,64,128,1,fp8,fp8,0,1.0755146344502766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,64,0,1,float16,fp8,0,1.13865065574646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,64,128,1,float16,float16,0,0.614245335261027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,64,0,1,float16,float16,0,0.6275893449783325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,64,0,1,fp8,fp8,0,1.0905546347300212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,64,128,1,float16,fp8,0,0.6023039817810059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,64,128,1,fp8,fp8,0,0.600218653678894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,64,0,1,float16,fp8,0,0.6165759960810343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,64,0,1,fp8,fp8,0,0.6094026565551758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,64,128,1,float16,float16,0,0.5685439904530843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,64,0,1,float16,float16,0,0.5773546695709229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,64,128,1,float16,fp8,0,0.5686453183492025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,64,128,1,fp8,fp8,0,0.530677318572998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,64,0,1,float16,fp8,0,0.5760639905929565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,64,0,1,fp8,fp8,0,0.5340426762898763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,64,128,1,float16,float16,0,0.5707093477249146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,64,0,1,float16,float16,0,0.580127994219462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,64,128,1,float16,fp8,0,0.5684533516565958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,64,128,1,fp8,fp8,0,0.5319626728693644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,64,0,1,float16,fp8,0,0.5765866835912069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,64,0,1,fp8,fp8,0,0.539247989654541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,64,128,1,float16,float16,0,0.5737706820170084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,64,0,1,float16,float16,0,0.5835520029067993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,64,128,1,fp8,fp8,0,0.5398826599121094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,64,128,1,float16,fp8,0,0.572160005569458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,64,0,1,float16,fp8,0,0.5820639928181967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,64,128,1,float16,float16,0,0.32119999329249066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,64,0,1,fp8,fp8,0,0.5456746816635132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,64,0,1,float16,float16,0,0.3251413305600484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,64,128,1,float16,fp8,0,0.3144960006078084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,64,128,1,fp8,fp8,0,0.31113600730895996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,64,0,1,float16,fp8,0,0.3214400013287862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,64,0,1,fp8,fp8,0,0.3148426612218221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,64,128,1,float16,float16,0,0.2937120000521342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,64,0,1,float16,float16,0,0.2992960015932719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,64,128,1,fp8,fp8,0,0.274671991666158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,64,128,1,float16,fp8,0,0.29524266719818115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,64,0,1,float16,fp8,0,0.29818665981292725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,64,0,1,fp8,fp8,0,0.2773333390553792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,64,128,1,float16,float16,0,0.295360008875529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,64,0,1,float16,float16,0,0.30055467287699383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,64,128,1,float16,fp8,0,0.29448533058166504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,64,128,1,fp8,fp8,0,0.2758986751238505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,64,0,1,float16,fp8,0,0.298527995745341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,64,0,1,fp8,fp8,0,0.2792746623357137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,64,128,1,float16,float16,0,0.2980106671651204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,64,0,1,float16,float16,0,0.3038026690483093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,64,128,1,float16,fp8,0,0.29915199677149457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,64,128,1,fp8,fp8,0,0.279914657274882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,64,0,1,float16,fp8,0,0.3038826584815979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,64,128,1,float16,float16,0,0.17101333538691202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,64,0,1,float16,float16,0,0.17407466967900595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,64,0,1,fp8,fp8,0,0.2847946683565776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,64,128,1,float16,fp8,0,0.16921599706013998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,64,128,1,fp8,fp8,0,0.16715733210245767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,64,0,1,float16,fp8,0,0.17115734020868936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,64,0,1,fp8,fp8,0,0.16937067111333212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,64,128,1,float16,float16,0,0.15524799625078836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,64,0,1,float16,float16,0,0.1583626667658488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,64,128,1,float16,fp8,0,0.1551359991232554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,64,128,1,fp8,fp8,0,0.14680000146230063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,64,0,1,float16,fp8,0,0.15827199816703796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,64,0,1,fp8,fp8,0,0.14867732922236124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,64,128,1,float16,float16,0,0.15587733189264932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,64,0,1,float16,float16,0,0.15824533502260843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,64,128,1,float16,fp8,0,0.15682666500409445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,64,128,1,fp8,fp8,0,0.14685333768526712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,64,0,1,float16,fp8,0,0.15848533312479654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,64,0,1,fp8,fp8,0,0.14993066589037576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,64,128,1,float16,float16,0,0.1574666698773702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,64,0,1,float16,float16,0,0.16078933080037436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,64,128,1,float16,fp8,0,0.15957333644231161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,64,128,1,fp8,fp8,0,0.15032000343004862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,64,0,1,float16,fp8,0,0.16050666570663452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,64,0,1,fp8,fp8,0,0.1545973320802053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,64,128,1,float16,float16,0,0.09680533409118652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,64,0,1,float16,float16,0,0.09809066851933797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,64,128,1,float16,fp8,0,0.09396800398826599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,64,128,1,fp8,fp8,0,0.0974133312702179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,64,0,1,float16,fp8,0,0.09635733564694722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,64,0,1,fp8,fp8,0,0.09656000137329102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,64,128,1,float16,float16,0,0.08683199683825175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,64,0,1,float16,float16,0,0.08870933453241985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,64,128,1,float16,fp8,0,0.08683199683825175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,64,128,1,fp8,fp8,0,0.08101333181063335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,64,0,1,float16,fp8,0,0.08742933471997578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,64,0,1,fp8,fp8,0,0.08089066545168559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,64,128,1,float16,float16,0,0.08778666456540425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,64,0,1,float16,float16,0,0.08737066388130188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,64,128,1,float16,fp8,0,0.08780266841252644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,64,128,1,fp8,fp8,0,0.08160000046094258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,64,0,1,float16,fp8,0,0.08824533224105835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,64,0,1,fp8,fp8,0,0.08266133566697438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,64,128,1,float16,float16,0,0.0888320008913676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,64,0,1,float16,float16,0,0.08890133102734883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,64,128,1,float16,fp8,0,0.08901866277058919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,64,128,1,fp8,fp8,0,0.08250666658083598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,64,0,1,float16,fp8,0,0.08954133590062459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,64,0,1,fp8,fp8,0,0.08418666323026021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,64,128,1,float16,float16,0,0.05474133292833964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,64,0,1,float16,float16,0,0.0561706672112147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,64,128,1,float16,fp8,0,0.05420800050099691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,64,128,1,fp8,fp8,0,0.05303466816743215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,64,0,1,float16,fp8,0,0.05585599939028422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,64,0,1,fp8,fp8,0,0.05407999952634176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,64,128,1,float16,float16,0,0.05179200073083242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,64,128,1,float16,fp8,0,0.05398933092753092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,64,0,1,float16,float16,0,0.05386666456858317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,64,128,1,fp8,fp8,0,0.04957333207130432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,64,0,1,float16,fp8,0,0.05409599840641022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,64,0,1,fp8,fp8,0,0.05110399921735128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,64,128,1,float16,float16,0,0.05373333394527435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,64,128,1,float16,fp8,0,0.054144000013669334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,64,0,1,float16,float16,0,0.05425600210825602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,64,128,1,fp8,fp8,0,0.0499893327554067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,64,0,1,float16,fp8,0,0.05409066875775655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,64,0,1,fp8,fp8,0,0.0499839981396993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,64,128,1,float16,float16,0,0.05398933092753092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,64,0,1,float16,float16,0,0.05413866539796194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,64,128,1,float16,fp8,0,0.05316799879074097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,64,128,1,fp8,fp8,0,0.05020266771316528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,64,0,1,float16,fp8,0,0.05402133365472158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,64,0,1,fp8,fp8,0,0.05065600077311198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,64,128,1,float16,float16,0,0.037647999823093414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,64,0,1,float16,float16,0,0.037952000896135964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,64,128,1,float16,fp8,0,0.03761066744724909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,64,128,1,fp8,fp8,0,0.03769599894682566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,64,0,1,float16,fp8,0,0.039061332742373146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,64,0,1,fp8,fp8,0,0.037685332198937736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,64,128,1,float16,float16,0,0.03793599953254064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,64,0,1,float16,float16,0,0.037658666570981346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,64,128,1,float16,fp8,0,0.03748266647259394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,64,128,1,fp8,fp8,0,0.03602666656176249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,64,0,1,float16,fp8,0,0.03783999880154928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,64,0,1,fp8,fp8,0,0.035429333647092186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,64,128,1,float16,float16,0,0.03773866593837738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,64,0,1,float16,float16,0,0.037578667203585304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,64,128,1,float16,fp8,0,0.03608533243338267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,64,128,1,fp8,fp8,0,0.03562666724125544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,64,0,1,float16,fp8,0,0.03824000060558319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,64,0,1,fp8,fp8,0,0.03570666660865148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,64,128,1,float16,float16,0,0.037685332198937736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,64,0,1,float16,float16,0,0.037733333806196846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,64,128,1,float16,fp8,0,0.03736533224582672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,64,128,1,fp8,fp8,0,0.0356480007370313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,64,0,1,float16,fp8,0,0.03769599894682566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,64,0,1,fp8,fp8,0,0.03613866617282232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,64,128,1,float16,float16,0,0.0252960001428922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,64,0,1,float16,float16,0,0.02754133443037669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,64,128,1,float16,fp8,0,0.027290667096773785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,64,128,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,64,0,1,fp8,fp8,0,0.02714666724205017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,64,128,1,float16,float16,0,0.02743999908367793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,64,0,1,float16,float16,0,0.02714666724205017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,64,0,1,float16,fp8,0,0.028533334533373516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,64,128,1,fp8,fp8,0,0.02526933451493581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,64,128,1,float16,fp8,0,0.027024000883102417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,64,0,1,float16,fp8,0,0.026608000199000042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,64,0,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,64,128,1,float16,float16,0,0.02537599951028824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,64,0,1,float16,float16,0,0.025493333737055462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,64,128,1,float16,fp8,0,0.025231999655564625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,64,128,1,fp8,fp8,0,0.025216000775496166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,64,0,1,float16,fp8,0,0.027514666318893433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,64,0,1,fp8,fp8,0,0.025199999411900837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,64,128,1,float16,float16,0,0.025370667378107708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,64,0,1,float16,float16,0,0.02773866554101308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,64,128,1,float16,fp8,0,0.025498665869235992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,64,128,1,fp8,fp8,0,0.025349333882331848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,64,0,1,float16,fp8,0,0.0276053324341774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,64,0,1,fp8,fp8,0,0.025226667523384094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,64,0,1,float16,float16,0,1.0153439839680989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,64,128,1,float16,float16,0,1.0366826852162678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,64,128,1,float16,fp8,0,1.0319519837697346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,64,128,1,fp8,fp8,0,0.9773386319478353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,64,0,1,float16,fp8,0,1.0134027004241943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,64,0,1,fp8,fp8,0,0.953279972076416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,64,128,1,float16,float16,0,1.0402026971181233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,64,0,1,float16,float16,0,1.0183626810709636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,64,128,1,float16,fp8,0,1.034277359644572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,64,128,1,fp8,fp8,0,1.026250680287679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,64,0,1,float16,fp8,0,1.0178453127543132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,64,0,1,fp8,fp8,0,1.0035040378570557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,64,128,1,float16,float16,0,1.057423988978068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,64,0,1,float16,float16,0,1.0341599782307942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,64,128,1,float16,fp8,0,1.0466240247090657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,64,128,1,fp8,fp8,0,1.005184014638265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,64,128,1,float16,float16,0,0.5772426525751749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,64,0,1,float16,float16,0,0.565285325050354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,64,0,1,fp8,fp8,0,0.9839519659678141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,64,0,1,float16,fp8,0,1.0292906761169434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,64,128,1,float16,fp8,0,0.5664746761322021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,64,128,1,fp8,fp8,0,0.5642826557159424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,64,0,1,float16,fp8,0,0.5558186769485474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,64,0,1,fp8,fp8,0,0.552346666653951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,64,128,1,float16,float16,0,0.5260479847590128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,64,0,1,float16,float16,0,0.5155093272527059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,64,128,1,fp8,fp8,0,0.4915573199590047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,64,128,1,float16,fp8,0,0.5261333386103312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,64,0,1,float16,fp8,0,0.5136160055796305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,64,0,1,fp8,fp8,0,0.47811198234558105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,64,128,1,float16,float16,0,0.5307626724243164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,64,0,1,float16,float16,0,0.518336017926534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,64,128,1,float16,fp8,0,0.5271946589152018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,64,128,1,fp8,fp8,0,0.5008746782938639
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,64,0,1,fp8,fp8,0,0.4796746571858724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,64,0,1,float16,fp8,0,0.5170133511225382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,64,128,1,float16,float16,0,0.537008007367452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,64,0,1,float16,float16,0,0.5245279868443807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,64,128,1,fp8,fp8,0,0.5041120052337646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,64,128,1,float16,fp8,0,0.5333066781361898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,64,0,1,float16,fp8,0,0.5228480100631714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,64,0,1,float16,float16,0,0.29612799485524494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,64,128,1,float16,float16,0,0.30082666873931885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,64,0,1,fp8,fp8,0,0.4897119998931885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,64,128,1,float16,fp8,0,0.29422932863235474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,64,128,1,fp8,fp8,0,0.2937813401222229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,64,0,1,float16,fp8,0,0.28994667530059814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,64,128,1,float16,float16,0,0.27448532978693646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,64,0,1,fp8,fp8,0,0.28853867451349896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,64,0,1,float16,float16,0,0.2691413362820943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,64,128,1,float16,fp8,0,0.27238933245340985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,64,128,1,fp8,fp8,0,0.25549866755803424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,64,0,1,float16,fp8,0,0.26850666602452594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,64,0,1,fp8,fp8,0,0.24885332584381104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,64,128,1,float16,float16,0,0.275711993376414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,64,0,1,float16,float16,0,0.27031999826431274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,64,128,1,float16,fp8,0,0.27433600028355914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,64,128,1,fp8,fp8,0,0.2584106723467509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,64,0,1,float16,fp8,0,0.27002666393915814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,64,0,1,fp8,fp8,0,0.2513013283411662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,64,128,1,float16,float16,0,0.27800534168879193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,64,0,1,float16,float16,0,0.27365867296854657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,64,128,1,float16,fp8,0,0.2783786654472351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,64,128,1,fp8,fp8,0,0.26280534267425537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,64,0,1,float16,fp8,0,0.27340267101923627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,64,128,1,float16,float16,0,0.16175466775894165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,64,0,1,fp8,fp8,0,0.255786657333374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,64,0,1,float16,float16,0,0.1567306617895762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,64,128,1,float16,fp8,0,0.15874666968981424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,64,128,1,fp8,fp8,0,0.15963199734687805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,64,0,1,float16,fp8,0,0.15497600038846335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,64,0,1,fp8,fp8,0,0.15495999654134116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,64,128,1,float16,float16,0,0.14591999848683676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,64,0,1,float16,float16,0,0.14425599575042725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,64,128,1,float16,fp8,0,0.14717866977055868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,64,128,1,fp8,fp8,0,0.13737066586812338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,64,0,1,float16,fp8,0,0.1425386667251587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,64,0,1,fp8,fp8,0,0.13407466808954874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,64,128,1,float16,float16,0,0.14689067006111145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,64,0,1,float16,float16,0,0.14482133587201437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,64,128,1,float16,fp8,0,0.14626666903495789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,64,128,1,fp8,fp8,0,0.13766400019327799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,64,0,1,float16,fp8,0,0.1437173287073771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,64,0,1,fp8,fp8,0,0.13577600320180258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,64,128,1,float16,float16,0,0.14806399742762247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,64,0,1,float16,float16,0,0.14586666226387024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,64,128,1,float16,fp8,0,0.14878400166829428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,64,128,1,fp8,fp8,0,0.14140799641609192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,64,0,1,float16,fp8,0,0.14622400204340616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,64,0,1,fp8,fp8,0,0.13714133699735007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,64,128,1,float16,float16,0,0.09148266911506653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,64,0,1,float16,float16,0,0.08917867143948872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,64,128,1,float16,fp8,0,0.09077866872151692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,64,128,1,fp8,fp8,0,0.0918933351834615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,64,0,1,float16,fp8,0,0.08901333808898926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,64,0,1,fp8,fp8,0,0.0913759966691335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,64,128,1,float16,float16,0,0.08383466800053914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,64,0,1,float16,float16,0,0.08227733274300893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,64,128,1,float16,fp8,0,0.083146666487058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,64,128,1,fp8,fp8,0,0.07673599819342296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,64,0,1,float16,fp8,0,0.08100266754627228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,64,0,1,fp8,fp8,0,0.07486933469772339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,64,128,1,float16,float16,0,0.08283199866612752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,64,0,1,float16,float16,0,0.08130133152008057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,64,128,1,float16,fp8,0,0.08456533153851827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,64,128,1,fp8,fp8,0,0.07713066538174947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,64,0,1,float16,fp8,0,0.08276799817879994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,64,0,1,fp8,fp8,0,0.07630399862925212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,64,128,1,float16,float16,0,0.08322666585445404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,64,0,1,float16,float16,0,0.0830026666323344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,64,128,1,float16,fp8,0,0.08476266264915466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,64,128,1,fp8,fp8,0,0.07877866427103679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,64,0,1,float16,fp8,0,0.08293333152929942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,64,0,1,fp8,fp8,0,0.07668266693751018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,64,128,1,float16,float16,0,0.0517546683549881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,64,0,1,float16,float16,0,0.050794666012128196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,64,128,1,float16,fp8,0,0.05096533397833506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,64,128,1,fp8,fp8,0,0.049925332268079124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,64,0,1,float16,fp8,0,0.0499946673711141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,64,0,1,fp8,fp8,0,0.04942933221658071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,64,128,1,float16,float16,0,0.04891733328501383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,64,0,1,float16,float16,0,0.04794133206208547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,64,128,1,float16,fp8,0,0.04982399940490723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,64,128,1,fp8,fp8,0,0.04765866696834564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,64,0,1,float16,fp8,0,0.04789866507053375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,64,0,1,fp8,fp8,0,0.045370668172836304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,64,128,1,float16,float16,0,0.049839998284975685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,64,0,1,float16,float16,0,0.047914668917655945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,64,128,1,float16,fp8,0,0.050111999114354454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,64,128,1,fp8,fp8,0,0.047370667258898415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,64,0,1,float16,fp8,0,0.049471999208132424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,64,0,1,fp8,fp8,0,0.045909335215886436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,64,128,1,float16,float16,0,0.05145066479841868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,64,0,1,float16,float16,0,0.04987200101216634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,64,128,1,float16,fp8,0,0.04993600149949392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,64,128,1,fp8,fp8,0,0.045935998360315956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,64,0,1,float16,fp8,0,0.04929066697756449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,64,0,1,fp8,fp8,0,0.045968001087506614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,64,128,1,float16,float16,0,0.035455999275048576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,64,0,1,float16,float16,0,0.03586133321126302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,64,128,1,float16,fp8,0,0.03733866661787033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,64,128,1,fp8,fp8,0,0.03573866685231527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,64,0,1,float16,fp8,0,0.03561066587766012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,64,128,1,float16,float16,0,0.035391998787721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,64,0,1,fp8,fp8,0,0.035349334279696144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,64,0,1,float16,float16,0,0.033786666889985405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,64,128,1,float16,fp8,0,0.036176001032193504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,64,128,1,fp8,fp8,0,0.033413333197434746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,64,0,1,float16,fp8,0,0.03566933423280716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,64,0,1,fp8,fp8,0,0.03393599887688955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,64,128,1,float16,float16,0,0.03533866753180822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,64,0,1,float16,float16,0,0.03554133325815201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,64,128,1,float16,fp8,0,0.035562666753927864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,64,128,1,fp8,fp8,0,0.03355200091997782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,64,0,1,float16,fp8,0,0.03392533212900162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,64,0,1,fp8,fp8,0,0.03346666693687439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,64,128,1,float16,float16,0,0.035418666899204254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,64,0,1,float16,float16,0,0.034586665530999504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,64,128,1,float16,fp8,0,0.03552533437808355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,64,128,1,fp8,fp8,0,0.03439466655254364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,64,0,1,float16,fp8,0,0.03525333354870478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,64,0,1,fp8,fp8,0,0.03332799921433131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,64,128,1,float16,float16,0,0.02516799916823705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,64,0,1,float16,float16,0,0.023376000424226124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,64,128,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,64,128,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,64,0,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,64,0,1,fp8,fp8,0,0.025237334271272022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,64,128,1,float16,float16,0,0.023386667172114056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,64,0,1,float16,float16,0,0.025306666890780132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,64,128,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,64,128,1,fp8,fp8,0,0.023381332556406658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,64,0,1,float16,fp8,0,0.025098666548728943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,64,0,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,64,128,1,float16,float16,0,0.025242666403452556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,64,0,1,float16,float16,0,0.027514666318893433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,64,128,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,64,128,1,fp8,fp8,0,0.023530667026837666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,64,0,1,float16,fp8,0,0.023354666928450268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,64,0,1,fp8,fp8,0,0.02306666721900304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,64,0,1,float16,float16,0,0.02535466601451238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,64,128,1,float16,fp8,0,0.023823998868465424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,64,128,1,float16,float16,0,0.025653332471847534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,64,128,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,64,0,1,float16,fp8,0,0.023232000569502514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,64,0,1,fp8,fp8,0,0.023402666052182514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,64,128,1,float16,float16,0,0.023215999205907185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,64,0,1,float16,float16,0,0.02146133283774058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,64,128,1,float16,fp8,0,0.02149333308140437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,64,0,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,64,128,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,64,0,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,64,128,1,float16,float16,0,0.02130666623512904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,64,128,1,float16,fp8,0,0.021530665457248688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,64,0,1,float16,float16,0,0.02147199958562851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,64,128,1,fp8,fp8,0,0.02107200026512146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,64,0,1,float16,fp8,0,0.02120000123977661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,64,0,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,64,128,1,float16,float16,0,0.02110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,64,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,64,128,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,64,128,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,64,0,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,64,0,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,64,128,1,float16,float16,0,0.021359999974568684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,64,0,1,float16,float16,0,0.02143466720978419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,64,128,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,64,128,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,64,0,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,64,0,1,fp8,fp8,0,0.019600000232458115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,64,128,1,float16,float16,0,0.554805318514506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,64,0,1,float16,float16,0,0.5567573308944702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,64,128,1,float16,fp8,0,0.5551360050837199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,64,128,1,fp8,fp8,0,0.5251306692759196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,64,0,1,float16,fp8,0,0.5516906579335531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,64,0,1,fp8,fp8,0,0.5261813402175903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,64,128,1,float16,float16,0,0.5574986537297567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,64,0,1,float16,float16,0,0.5565813382466634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,64,128,1,float16,fp8,0,0.5571466684341431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,64,128,1,fp8,fp8,0,0.5346826712290446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,64,0,1,float16,fp8,0,0.5538026491800944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,64,0,1,fp8,fp8,0,0.5346399943033854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,64,128,1,float16,float16,0,0.5629386504491171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,64,0,1,float16,float16,0,0.5676373243331909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,64,128,1,float16,fp8,0,0.561189333597819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,64,128,1,fp8,fp8,0,0.540341337521871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,64,0,1,float16,fp8,0,0.5622506539026896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,64,0,1,fp8,fp8,0,0.541381319363912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,64,128,1,float16,float16,0,0.3104959925015767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,64,0,1,float16,float16,0,0.31167999903361004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,64,128,1,float16,fp8,0,0.3067520062128703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,64,128,1,fp8,fp8,0,0.3070346713066101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,64,0,1,float16,fp8,0,0.3067733248074849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,64,0,1,fp8,fp8,0,0.3060213327407837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,64,128,1,float16,float16,0,0.28700800736745197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,64,0,1,float16,float16,0,0.2866719961166382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,64,128,1,float16,fp8,0,0.28571732838948566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,64,128,1,fp8,fp8,0,0.27046932776769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,64,0,1,float16,fp8,0,0.2861386736234029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,64,0,1,fp8,fp8,0,0.2708853284517924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,64,128,1,float16,float16,0,0.28750399748484295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,64,0,1,float16,float16,0,0.2868746717770894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,64,128,1,float16,fp8,0,0.2852426568667094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,64,128,1,fp8,fp8,0,0.2738506595293681
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,64,0,1,float16,fp8,0,0.2856000065803528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,64,0,1,fp8,fp8,0,0.27480000257492065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,64,128,1,float16,float16,0,0.29043734073638916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,64,0,1,float16,float16,0,0.29029866059621173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,64,128,1,float16,fp8,0,0.29024000962575275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,64,128,1,fp8,fp8,0,0.2786933382352193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,64,0,1,float16,fp8,0,0.2906186580657959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,64,128,1,float16,float16,0,0.16498133540153503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,64,0,1,fp8,fp8,0,0.2765226761500041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,64,0,1,float16,float16,0,0.16556266943613687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,64,128,1,float16,fp8,0,0.1629813313484192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,64,128,1,fp8,fp8,0,0.1646666626135508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,64,0,1,float16,fp8,0,0.1625653306643168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,64,0,1,fp8,fp8,0,0.1637333333492279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,64,128,1,float16,float16,0,0.15213867028554282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,64,0,1,float16,float16,0,0.15339199701944986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,64,128,1,float16,fp8,0,0.15159466862678528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,64,128,1,fp8,fp8,0,0.14366933703422546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,64,0,1,float16,fp8,0,0.15215466419855753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,64,0,1,fp8,fp8,0,0.14417599638303122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,64,128,1,float16,float16,0,0.15247467160224915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,64,0,1,float16,float16,0,0.15249066551526388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,64,128,1,float16,fp8,0,0.15203733245531717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,64,128,1,fp8,fp8,0,0.14476799964904785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,64,0,1,float16,fp8,0,0.15221866965293884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,64,0,1,fp8,fp8,0,0.14460800091425577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,64,128,1,float16,float16,0,0.15423466761906943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,64,0,1,float16,float16,0,0.15465600291887918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,64,128,1,float16,fp8,0,0.15342400471369425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,64,128,1,fp8,fp8,0,0.1483733355998993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,64,0,1,float16,fp8,0,0.1546346644560496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,64,128,1,float16,float16,0,0.09273067116737366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,64,0,1,fp8,fp8,0,0.14828800161679587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,64,0,1,float16,float16,0,0.09340799848238628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,64,128,1,float16,fp8,0,0.09218133489290874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,64,128,1,fp8,fp8,0,0.09422933061917622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,64,0,1,float16,fp8,0,0.09149332841237386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,64,0,1,fp8,fp8,0,0.09356266260147095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,64,128,1,float16,float16,0,0.08648000160853068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,64,0,1,float16,float16,0,0.08553600311279297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,64,128,1,float16,fp8,0,0.08542399605115254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,64,128,1,fp8,fp8,0,0.0792746643225352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,64,0,1,float16,fp8,0,0.08656000097592671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,64,0,1,fp8,fp8,0,0.08036266764005025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,64,128,1,float16,float16,0,0.08602133393287659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,64,0,1,float16,float16,0,0.08477333188056946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,64,128,1,float16,fp8,0,0.08495466907819112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,64,128,1,fp8,fp8,0,0.08027199904123943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,64,0,1,float16,fp8,0,0.0849173367023468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,64,0,1,fp8,fp8,0,0.08042666812737782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,64,128,1,float16,float16,0,0.08674666285514832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,64,0,1,float16,float16,0,0.08559466401735942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,64,128,1,float16,fp8,0,0.08593066533406575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,64,128,1,fp8,fp8,0,0.08096533517042796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,64,0,1,float16,fp8,0,0.08588266372680664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,64,0,1,fp8,fp8,0,0.0794239987929662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,64,128,1,float16,float16,0,0.05190933247407278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,64,0,1,float16,float16,0,0.05362666646639506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,64,128,1,float16,fp8,0,0.054005334774653115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,64,128,1,fp8,fp8,0,0.051957334081331887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,64,0,1,float16,fp8,0,0.05357333521048228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,64,0,1,fp8,fp8,0,0.05190933247407278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,64,128,1,float16,float16,0,0.05202133456865946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,64,0,1,float16,float16,0,0.050144001841545105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,64,128,1,float16,fp8,0,0.05089599887530009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,64,128,1,fp8,fp8,0,0.0492799977461497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,64,0,1,float16,fp8,0,0.05039466420809428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,64,0,1,fp8,fp8,0,0.04779199759165446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,64,128,1,float16,float16,0,0.05087466537952423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,64,0,1,float16,float16,0,0.051957334081331887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,64,128,1,float16,fp8,0,0.05049600203831991
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,64,128,1,fp8,fp8,0,0.04822400212287903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,64,0,1,float16,fp8,0,0.052058666944503784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,64,0,1,fp8,fp8,0,0.04790933430194855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,64,128,1,float16,float16,0,0.05153066913286845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,64,0,1,float16,float16,0,0.051776001850763954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,64,128,1,float16,fp8,0,0.051925331354141235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,64,128,1,fp8,fp8,0,0.04818666477998098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,64,0,1,float16,fp8,0,0.05208000044027964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,64,128,1,float16,float16,0,0.03577066709597906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,64,0,1,fp8,fp8,0,0.04975466430187225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,64,0,1,float16,float16,0,0.03369066615899404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,64,128,1,float16,fp8,0,0.035349334279696144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,64,128,1,fp8,fp8,0,0.03557866563399633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,64,0,1,float16,fp8,0,0.03426666557788849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,64,0,1,fp8,fp8,0,0.03538133452335993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,64,128,1,float16,float16,0,0.03409066547950109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,64,0,1,float16,float16,0,0.033344000577926636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,64,128,1,float16,fp8,0,0.03431999931732813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,64,128,1,fp8,fp8,0,0.03169066707293192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,64,0,1,float16,fp8,0,0.03342933456103007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,64,0,1,fp8,fp8,0,0.03216533362865448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,64,128,1,float16,float16,0,0.03338133295377096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,64,0,1,float16,float16,0,0.033600000043710075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,64,128,1,float16,fp8,0,0.03305600086847941
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,64,128,1,fp8,fp8,0,0.032431999842325844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,64,0,1,float16,fp8,0,0.03411199897527695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,64,0,1,fp8,fp8,0,0.03190399954716364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,64,128,1,float16,float16,0,0.033615998923778534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,64,0,1,float16,float16,0,0.033530667424201965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,64,128,1,float16,fp8,0,0.03366933266321818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,64,128,1,fp8,fp8,0,0.03356266766786575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,64,0,1,float16,fp8,0,0.03469866762558619
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,64,0,1,fp8,fp8,0,0.0335359995563825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,64,128,1,float16,float16,0,0.02517866591612498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,64,0,1,float16,float16,0,0.02534399926662445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,64,128,1,float16,fp8,0,0.026922665536403656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,64,128,1,fp8,fp8,0,0.02553066611289978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,64,0,1,float16,fp8,0,0.025402667621771496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,64,0,1,fp8,fp8,0,0.025685332715511322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,64,128,1,float16,float16,0,0.025146665672461193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,64,0,1,float16,float16,0,0.025429333249727886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,64,128,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,64,128,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,64,0,1,float16,fp8,0,0.025402667621771496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,64,0,1,fp8,fp8,0,0.025173333783944447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,64,128,1,float16,float16,0,0.025285333395004272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,64,0,1,float16,float16,0,0.025066666305065155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,64,128,1,float16,fp8,0,0.02554133286078771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,64,128,1,fp8,fp8,0,0.025231999655564625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,64,0,1,float16,fp8,0,0.025477332373460133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,64,0,1,fp8,fp8,0,0.025301332275072735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,64,128,1,float16,float16,0,0.02536533276240031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,64,0,1,float16,float16,0,0.025077333052953083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,64,128,1,float16,fp8,0,0.025546667476495106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,64,128,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,64,0,1,float16,fp8,0,0.02515200028816859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,64,0,1,fp8,fp8,0,0.02317333221435547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,64,128,1,float16,float16,0,0.018858666221300762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,64,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,64,128,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,64,128,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,64,0,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,64,0,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,64,128,1,float16,float16,0,0.01739199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,64,0,1,float16,float16,0,0.01922133316596349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,64,128,1,float16,fp8,0,0.018837332725524902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,64,128,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,64,0,1,float16,fp8,0,0.019391999890406925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,64,0,1,fp8,fp8,0,0.018922666708628338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,64,128,1,float16,float16,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,64,0,1,float16,float16,0,0.018858666221300762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,64,128,1,float16,fp8,0,0.018853332847356796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,64,128,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,64,0,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,64,0,1,float16,fp8,0,0.019727999965349834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,64,128,1,float16,float16,0,0.019146667172511418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,64,0,1,float16,float16,0,0.017525333911180496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,64,128,1,fp8,fp8,0,0.01749333366751671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,64,128,1,float16,fp8,0,0.020074666788180668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,64,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,64,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,64,128,1,float16,float16,0,0.017680000513792038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,64,0,1,float16,float16,0,0.019253333409627277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,64,128,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,64,128,1,fp8,fp8,0,0.017551999539136887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,64,0,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,64,0,1,fp8,fp8,0,0.01748266691962878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,64,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,64,128,1,float16,float16,0,0.017338667064905167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,64,128,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,64,0,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,64,0,1,float16,fp8,0,0.017509333789348602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,64,0,1,float16,float16,0,0.018218666315078735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,64,128,1,float16,fp8,0,0.01947733387351036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,64,128,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,64,0,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,64,0,1,fp8,fp8,0,0.01754133279124896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,64,128,1,float16,float16,0,0.01854933301607768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,64,0,1,float16,float16,0,0.018453333526849747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,64,128,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,64,128,1,fp8,fp8,0,0.01781333362062772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,64,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,64,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,64,128,1,float16,float16,0,0.3882506688435872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,64,0,1,float16,float16,0,0.38885335127512616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,64,128,1,float16,fp8,0,0.3861120144526164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,64,128,1,fp8,fp8,0,0.3608106772104899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,64,0,1,float16,fp8,0,0.38809601465861004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,64,0,1,fp8,fp8,0,0.36046401659647626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,64,128,1,float16,float16,0,0.388922651608785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,64,0,1,float16,float16,0,0.3898613452911377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,64,128,1,float16,fp8,0,0.3884213368097941
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,64,128,1,fp8,fp8,0,0.36347198486328125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,64,0,1,float16,fp8,0,0.38713598251342773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,64,0,1,fp8,fp8,0,0.3638559977213542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,64,128,1,float16,float16,0,0.39352532227834064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,64,0,1,float16,float16,0,0.39261865615844727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,64,128,1,float16,fp8,0,0.3901013135910034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,64,128,1,fp8,fp8,0,0.3668160041173299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,64,0,1,fp8,fp8,0,0.36768531799316406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,64,0,1,float16,fp8,0,0.3910293181737264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,64,128,1,float16,float16,0,0.21651200453440347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,64,0,1,float16,float16,0,0.21381866931915283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,64,128,1,float16,fp8,0,0.2137653430302938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,64,128,1,fp8,fp8,0,0.20840533574422201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,64,0,1,float16,fp8,0,0.21227733294169107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,64,128,1,float16,float16,0,0.20356265703837076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,64,0,1,fp8,fp8,0,0.2092640002568563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,64,0,1,float16,float16,0,0.2032159964243571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,64,128,1,float16,fp8,0,0.20350933074951172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,64,128,1,fp8,fp8,0,0.18750399351119995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,64,0,1,float16,fp8,0,0.204367995262146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,64,0,1,fp8,fp8,0,0.1877653400103251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,64,128,1,float16,float16,0,0.20359466473261514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,64,0,1,float16,float16,0,0.2032960057258606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,64,128,1,float16,fp8,0,0.2041706641515096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,64,128,1,fp8,fp8,0,0.18955200910568237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,64,0,1,float16,fp8,0,0.20292266209920248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,64,0,1,fp8,fp8,0,0.18952532609303793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,64,0,1,float16,float16,0,0.2060426672299703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,64,128,1,float16,float16,0,0.20602667331695557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,64,128,1,fp8,fp8,0,0.19309866428375244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,64,128,1,float16,fp8,0,0.20489599307378134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,64,0,1,float16,float16,0,0.11700266599655151
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,64,128,1,float16,float16,0,0.1176639993985494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,64,0,1,float16,fp8,0,0.2055466572443644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,64,0,1,fp8,fp8,0,0.1925119956334432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,64,128,1,float16,fp8,0,0.11560533444086711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,64,128,1,fp8,fp8,0,0.11580800016721089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,64,0,1,float16,fp8,0,0.11745599905649821
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,64,0,1,fp8,fp8,0,0.11731200416882832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,64,128,1,float16,float16,0,0.1095360020796458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,64,0,1,float16,float16,0,0.1123466690381368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,64,128,1,float16,fp8,0,0.10993066430091858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,64,128,1,fp8,fp8,0,0.10213333368301392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,64,0,1,float16,fp8,0,0.10938133796056111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,64,0,1,fp8,fp8,0,0.10123733679453532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,64,128,1,float16,float16,0,0.11028800408045451
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,64,0,1,float16,float16,0,0.10974400242169698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,64,128,1,float16,fp8,0,0.11079999804496765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,64,128,1,fp8,fp8,0,0.10162132978439331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,64,0,1,float16,fp8,0,0.11152533690134685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,64,0,1,fp8,fp8,0,0.10161067048708598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,64,128,1,float16,float16,0,0.11129599809646606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,64,0,1,float16,float16,0,0.11085866888364156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,64,128,1,float16,fp8,0,0.11131200194358826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,64,128,1,fp8,fp8,0,0.10206933816274007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,64,0,1,float16,fp8,0,0.10957333445549011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,64,128,1,float16,float16,0,0.06473599870999654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,64,0,1,fp8,fp8,0,0.1018399993578593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,64,0,1,float16,float16,0,0.0643093337615331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,64,128,1,float16,fp8,0,0.0647573322057724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,64,128,1,fp8,fp8,0,0.0641546646753947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,64,0,1,float16,fp8,0,0.06610666712125142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,64,0,1,fp8,fp8,0,0.06266133487224579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,64,128,1,float16,float16,0,0.06440000236034393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,64,0,1,float16,float16,0,0.06432533264160156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,64,128,1,float16,fp8,0,0.06277866661548615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,64,128,1,fp8,fp8,0,0.06021333237489065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,64,0,1,float16,fp8,0,0.06422933439413707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,64,0,1,fp8,fp8,0,0.0603413333495458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,64,128,1,float16,float16,0,0.06428800026575725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,64,0,1,float16,float16,0,0.06449600060780843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,64,128,1,float16,fp8,0,0.06426133215427399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,64,128,1,fp8,fp8,0,0.05970133344332377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,64,0,1,float16,fp8,0,0.06211733321348826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,64,0,1,fp8,fp8,0,0.0595360000928243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,64,128,1,float16,float16,0,0.06238399942715963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,64,0,1,float16,float16,0,0.06282666822274525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,64,128,1,float16,fp8,0,0.06406933565934499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,64,128,1,fp8,fp8,0,0.0602453351020813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,64,0,1,float16,fp8,0,0.06387733419736226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,64,128,1,float16,float16,0,0.04264000058174133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,64,0,1,fp8,fp8,0,0.06031466523806254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,64,0,1,float16,float16,0,0.04153066625197729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,64,128,1,float16,fp8,0,0.04179200033346812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,64,128,1,fp8,fp8,0,0.04147200038035711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,64,0,1,float16,fp8,0,0.041936000188191734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,64,0,1,fp8,fp8,0,0.04091199984153112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,64,128,1,float16,float16,0,0.04154666761557261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,64,0,1,float16,float16,0,0.04136000076929728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,64,128,1,float16,fp8,0,0.040362666050593056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,64,128,1,fp8,fp8,0,0.03985599925120672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,64,0,1,float16,fp8,0,0.04160533348719279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,64,0,1,fp8,fp8,0,0.03902400036652883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,64,128,1,float16,float16,0,0.039936001102129616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,64,0,1,float16,float16,0,0.042319998145103455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,64,128,1,float16,fp8,0,0.04180799921353658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,64,128,1,fp8,fp8,0,0.039359999199708305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,64,0,1,float16,fp8,0,0.04112533231576284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,64,0,1,fp8,fp8,0,0.03989866624275843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,64,128,1,float16,float16,0,0.041573333243529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,64,0,1,float16,float16,0,0.04048000027736028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,64,128,1,float16,fp8,0,0.041834667325019836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,64,128,1,fp8,fp8,0,0.039546666045983635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,64,0,1,float16,fp8,0,0.041802664597829185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,64,0,1,fp8,fp8,0,0.040218666195869446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,64,128,1,float16,float16,0,0.027322667340437572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,64,0,1,float16,float16,0,0.027674667537212372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,64,128,1,float16,fp8,0,0.02740799884001414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,64,128,1,fp8,fp8,0,0.02756800005833308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,64,0,1,float16,fp8,0,0.02749866743882497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,64,0,1,fp8,fp8,0,0.027327999472618103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,64,128,1,float16,float16,0,0.027600000301996868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,64,0,1,float16,float16,0,0.027221334477265675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,64,128,1,float16,fp8,0,0.027274665733178455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,64,128,1,fp8,fp8,0,0.027285332481066387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,64,0,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,64,0,1,fp8,fp8,0,0.025370667378107708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,64,128,1,float16,float16,0,0.027056001126766205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,64,0,1,float16,float16,0,0.027242665489514668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,64,128,1,float16,fp8,0,0.027434666951497395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,64,128,1,fp8,fp8,0,0.02533866713444392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,64,0,1,float16,fp8,0,0.027285332481066387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,64,0,1,fp8,fp8,0,0.02712533374627431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,64,128,1,float16,float16,0,0.027509334186712902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,64,0,1,float16,float16,0,0.02754666656255722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,64,128,1,float16,fp8,0,0.02737066646416982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,64,128,1,fp8,fp8,0,0.02739199995994568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,64,0,1,float16,fp8,0,0.02775466690460841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,64,0,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,64,128,1,float16,float16,0,0.021253332495689392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,64,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,64,128,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,64,128,1,fp8,fp8,0,0.021370666722456615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,64,0,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,64,0,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,64,128,1,float16,float16,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,64,0,1,float16,float16,0,0.020975999534130096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,64,128,1,float16,fp8,0,0.02162666618824005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,64,128,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,64,0,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,64,0,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,64,128,1,float16,float16,0,0.021114667256673176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,64,0,1,float16,float16,0,0.02109333376089732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,64,128,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,64,128,1,fp8,fp8,0,0.02144533395767212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,64,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,64,0,1,fp8,fp8,0,0.02091199904680252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,64,128,1,float16,float16,0,0.021183999876181286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,64,0,1,float16,float16,0,0.021104000508785248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,64,128,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,64,128,1,fp8,fp8,0,0.021370666722456615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,64,0,1,fp8,fp8,0,0.01937599976857503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,64,0,1,float16,fp8,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,64,128,1,float16,float16,0,0.016805333395799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,64,128,1,fp8,fp8,0,0.01809599995613098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,64,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,64,128,1,float16,float16,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,64,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,64,128,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,64,0,1,float16,fp8,0,0.01759999990463257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,64,128,1,fp8,fp8,0,0.017887999614079792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,64,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,64,128,1,float16,float16,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,64,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,64,128,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,64,128,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,64,0,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,64,128,1,float16,float16,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,64,0,1,float16,float16,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,64,128,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,64,0,1,float16,fp8,0,0.01874133323629697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,64,128,1,float16,float16,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,64,0,1,float16,float16,0,0.017530667285124462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,64,128,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,64,128,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,64,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,64,128,1,float16,float16,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,64,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,64,128,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,64,128,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,64,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,64,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,64,128,1,float16,float16,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,64,0,1,float16,float16,0,0.01637866720557213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,64,128,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,64,0,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,64,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,64,128,1,float16,float16,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,64,128,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,64,0,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,64,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,64,128,1,float16,float16,0,0.3087093234062195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,64,0,1,float16,float16,0,0.30902934074401855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,64,128,1,float16,fp8,0,0.3083733320236206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,64,128,1,fp8,fp8,0,0.27956799666086835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,64,0,1,fp8,fp8,0,0.2789439956347148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,64,0,1,float16,fp8,0,0.30821865797042847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,64,128,1,float16,float16,0,0.3084320028622945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,64,0,1,float16,float16,0,0.3074079950650533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,64,128,1,float16,fp8,0,0.3078239957491557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,64,128,1,fp8,fp8,0,0.2816693385442098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,64,0,1,float16,fp8,0,0.30692799886067706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,64,0,1,fp8,fp8,0,0.2808693250020345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,64,128,1,float16,float16,0,0.31055466334025067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,64,0,1,float16,float16,0,0.309279998143514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,64,128,1,float16,fp8,0,0.30871466795603436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,64,128,1,fp8,fp8,0,0.28494399785995483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,64,0,1,fp8,fp8,0,0.28436267375946045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,64,0,1,float16,fp8,0,0.31012799342473346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,64,128,1,float16,float16,0,0.17083199818929037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,64,128,1,float16,fp8,0,0.16849066813786825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,64,0,1,float16,float16,0,0.16970133781433105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,64,128,1,fp8,fp8,0,0.16240533192952475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,64,0,1,float16,fp8,0,0.16746666034062704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,64,0,1,fp8,fp8,0,0.1616159975528717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,64,128,1,float16,float16,0,0.16353066762288412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,64,128,1,fp8,fp8,0,0.14637333154678345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,64,128,1,float16,fp8,0,0.16269866625467935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,64,0,1,float16,float16,0,0.1632960041364034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,64,0,1,float16,fp8,0,0.16294399897257486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,64,0,1,fp8,fp8,0,0.14813866217931113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,64,128,1,float16,float16,0,0.16288000345230103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,64,0,1,float16,float16,0,0.16244799892107645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,64,128,1,fp8,fp8,0,0.1483466625213623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,64,0,1,float16,fp8,0,0.16268266240755716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,64,128,1,float16,fp8,0,0.16311466693878174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,64,0,1,fp8,fp8,0,0.1476533313592275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,64,128,1,float16,float16,0,0.16353066762288412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,64,0,1,float16,float16,0,0.16365866859753928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,64,128,1,float16,fp8,0,0.16451733311017355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,64,128,1,fp8,fp8,0,0.1490293343861898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,64,0,1,float16,fp8,0,0.16320000092188516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,64,0,1,fp8,fp8,0,0.14830399552981058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,64,128,1,float16,float16,0,0.09174933036168416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,64,0,1,float16,float16,0,0.09158399701118469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,64,128,1,float16,fp8,0,0.09112000465393066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,64,128,1,fp8,fp8,0,0.08480000495910645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,64,0,1,float16,fp8,0,0.09130133191744487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,64,128,1,float16,float16,0,0.08891200025876363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,64,0,1,fp8,fp8,0,0.08520533641179402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,64,0,1,float16,float16,0,0.08890133102734883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,64,128,1,float16,fp8,0,0.08916800220807393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,64,128,1,fp8,fp8,0,0.08184533317883809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,64,0,1,fp8,fp8,0,0.08291199803352356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,64,0,1,float16,fp8,0,0.0890880028406779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,64,128,1,float16,float16,0,0.08919999996821086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,64,0,1,float16,float16,0,0.08887466788291931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,64,128,1,float16,fp8,0,0.08897599577903748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,64,128,1,fp8,fp8,0,0.08268799881140391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,64,0,1,float16,fp8,0,0.08945600191752116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,64,0,1,fp8,fp8,0,0.08102400104204814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,64,128,1,float16,float16,0,0.08975467085838318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,64,0,1,float16,float16,0,0.08947733044624329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,64,128,1,float16,fp8,0,0.08907199899355571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,64,128,1,fp8,fp8,0,0.08251200119654338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,64,0,1,float16,fp8,0,0.0890826682249705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,64,0,1,fp8,fp8,0,0.08069866895675659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,64,128,1,float16,float16,0,0.055386667450269066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,64,0,1,float16,float16,0,0.05533333122730255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,64,128,1,float16,fp8,0,0.054378668467203774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,64,128,1,fp8,fp8,0,0.052229334910710655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,64,0,1,float16,fp8,0,0.05420800050099691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,64,0,1,fp8,fp8,0,0.05218133330345154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,64,0,1,float16,float16,0,0.0535093347231547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,64,128,1,float16,float16,0,0.05382933219273885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,64,128,1,float16,fp8,0,0.054229333996772766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,64,128,1,fp8,fp8,0,0.05070933202902476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,64,0,1,float16,fp8,0,0.05338666836420695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,64,0,1,fp8,fp8,0,0.05060799916585287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,64,128,1,float16,float16,0,0.05407999952634176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,64,0,1,float16,float16,0,0.053904001911481224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,64,128,1,float16,fp8,0,0.054229333996772766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,64,128,1,fp8,fp8,0,0.049914668003718056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,64,0,1,float16,fp8,0,0.053802669048309326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,64,0,1,fp8,fp8,0,0.04974400003751119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,64,128,1,float16,float16,0,0.053616002202034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,64,0,1,float16,float16,0,0.05431999762852987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,64,128,1,float16,fp8,0,0.05231999854246775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,64,128,1,fp8,fp8,0,0.05027199784914652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,64,0,1,float16,fp8,0,0.05397333204746246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,64,0,1,fp8,fp8,0,0.04980800052483877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,64,128,1,float16,float16,0,0.03555733213822047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,64,0,1,float16,float16,0,0.035162667433420815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,64,128,1,float16,fp8,0,0.0353973334034284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,64,128,1,fp8,fp8,0,0.03555200000603994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,64,0,1,float16,fp8,0,0.035386666655540466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,64,0,1,fp8,fp8,0,0.035216001172860466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,64,128,1,float16,float16,0,0.03562666724125544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,64,0,1,float16,float16,0,0.03557866563399633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,64,128,1,float16,fp8,0,0.03550933301448822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,64,128,1,fp8,fp8,0,0.03381866713364919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,64,0,1,float16,fp8,0,0.035936000446478523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,64,0,1,fp8,fp8,0,0.03196266790231069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,64,128,1,float16,float16,0,0.03509333233038584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,64,0,1,float16,float16,0,0.0354666660229365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,64,128,1,float16,fp8,0,0.033701332906881966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,64,128,1,fp8,fp8,0,0.033301333586374916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,64,0,1,float16,fp8,0,0.035349334279696144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,64,0,1,fp8,fp8,0,0.03355200091997782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,64,128,1,float16,float16,0,0.033946665624777474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,64,0,1,float16,float16,0,0.03383466601371765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,64,128,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,64,128,1,fp8,fp8,0,0.03349866718053818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,64,0,1,float16,fp8,0,0.033573334415753685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,64,0,1,fp8,fp8,0,0.033786666889985405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,64,128,1,float16,float16,0,0.023775999744733173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,64,0,1,float16,float16,0,0.023408000667889912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,64,128,1,float16,fp8,0,0.025461333493391674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,64,128,1,fp8,fp8,0,0.02327999969323476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,64,0,1,float16,fp8,0,0.023311999936898548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,64,0,1,fp8,fp8,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,64,128,1,float16,float16,0,0.02345066765944163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,64,0,1,float16,float16,0,0.023743999501069386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,64,128,1,float16,fp8,0,0.02310933421055476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,64,128,1,fp8,fp8,0,0.022997332115968067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,64,0,1,float16,fp8,0,0.02518933266401291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,64,0,1,fp8,fp8,0,0.0222080002228419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,64,128,1,float16,float16,0,0.022954667607943218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,64,0,1,float16,float16,0,0.023728000621000927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,64,128,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,64,128,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,64,0,1,float16,fp8,0,0.025242666403452556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,64,0,1,fp8,fp8,0,0.023599999646345775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,64,128,1,float16,float16,0,0.024351999163627625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,64,0,1,float16,float16,0,0.02346666653951009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,64,128,1,float16,fp8,0,0.024608001112937927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,64,128,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,64,0,1,float16,fp8,0,0.023333333432674408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,64,0,1,fp8,fp8,0,0.02348800003528595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,64,128,1,float16,float16,0,0.02120000123977661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,64,0,1,float16,float16,0,0.02125866711139679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,64,128,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,64,128,1,fp8,fp8,0,0.01952533299724261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,64,0,1,float16,fp8,0,0.021104000508785248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,64,0,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,64,128,1,float16,float16,0,0.019152000546455383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,64,0,1,float16,float16,0,0.01942933350801468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,64,128,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,64,128,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,64,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,64,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,64,128,1,float16,float16,0,0.020410666863123577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,64,0,1,float16,float16,0,0.01945066700379054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,64,128,1,float16,fp8,0,0.01971199984351794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,64,0,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,64,0,1,fp8,fp8,0,0.02067199970285098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,64,128,1,fp8,fp8,0,0.021856000026067097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,64,128,1,float16,float16,0,0.019440000255902607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,64,0,1,float16,float16,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,64,128,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,64,128,1,fp8,fp8,0,0.019946667055288952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,64,0,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,64,128,1,float16,float16,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,64,0,1,fp8,fp8,0,0.02165866643190384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,64,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,64,128,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,64,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,64,128,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,64,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,64,128,1,float16,float16,0,0.01681600014368693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,64,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,64,128,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,64,128,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,64,0,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,64,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,64,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,64,0,1,float16,float16,0,0.016149333367745083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,64,128,1,fp8,fp8,0,0.01621866722901662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,64,128,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,64,0,1,fp8,fp8,0,0.01803733284274737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,64,128,1,float16,float16,0,0.01775466650724411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,64,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,64,128,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,64,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,64,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,64,0,1,float16,float16,0,0.01553600033124288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,64,128,1,float16,float16,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,64,128,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,64,128,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,64,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,64,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,64,128,1,float16,float16,0,0.01670933390657107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,64,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,64,128,1,float16,fp8,0,0.017477333545684814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,64,0,1,float16,fp8,0,0.016693333784739178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,64,0,1,fp8,fp8,0,0.01605333387851715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,64,128,1,float16,float16,0,0.01681600014368693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,64,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,64,128,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,64,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,64,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,64,0,1,fp8,fp8,0,0.017445333302021027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,64,128,1,float16,float16,0,0.01533866673707962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,64,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,64,128,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,64,128,1,fp8,fp8,0,0.01617066686352094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,64,0,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,64,0,1,fp8,fp8,0,0.01621333385507266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,64,128,1,float16,float16,0,0.26733867327372235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,64,0,1,float16,float16,0,0.26659733057022095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,64,128,1,float16,fp8,0,0.26578666766484577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,64,128,1,fp8,fp8,0,0.24217599630355835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,64,0,1,float16,fp8,0,0.2666880091031392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,64,0,1,fp8,fp8,0,0.24041066567103067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,64,128,1,float16,float16,0,0.26682666937510174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,64,0,1,float16,float16,0,0.2672906716664632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,64,128,1,float16,fp8,0,0.26765867074330646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,64,128,1,fp8,fp8,0,0.2407146692276001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,64,0,1,float16,fp8,0,0.2664053241411845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,64,0,1,fp8,fp8,0,0.2415999968846639
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,64,128,1,float16,float16,0,0.2674880027770996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,64,0,1,float16,float16,0,0.26714134216308594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,64,128,1,float16,fp8,0,0.26708267132441205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,64,128,1,fp8,fp8,0,0.24195200204849243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,64,0,1,float16,fp8,0,0.26636266708374023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,64,0,1,fp8,fp8,0,0.24241065979003906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,64,128,1,float16,float16,0,0.14239466190338135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,64,0,1,float16,float16,0,0.1420693298180898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,64,128,1,float16,fp8,0,0.14018133282661438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,64,128,1,fp8,fp8,0,0.13291733463605246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,64,0,1,fp8,fp8,0,0.1320373316605886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,64,0,1,float16,fp8,0,0.14084266622861227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,64,128,1,float16,float16,0,0.13929067055384317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,64,0,1,float16,float16,0,0.13934399684270224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,64,128,1,float16,fp8,0,0.14069867134094238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,64,0,1,float16,fp8,0,0.13986133535703024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,64,128,1,fp8,fp8,0,0.12844799955685934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,64,0,1,fp8,fp8,0,0.12820266683896384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,64,128,1,float16,float16,0,0.14013866583506265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,64,0,1,float16,float16,0,0.13992533087730408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,64,128,1,fp8,fp8,0,0.1297813355922699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,64,128,1,float16,fp8,0,0.139082670211792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,64,0,1,float16,fp8,0,0.14011200269063315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,64,0,1,fp8,fp8,0,0.13008000453313193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,64,128,1,float16,float16,0,0.1402666668097178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,64,128,1,float16,fp8,0,0.14030399918556213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,64,0,1,float16,float16,0,0.13954666256904602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,64,128,1,fp8,fp8,0,0.12980799873669943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,64,0,1,float16,fp8,0,0.1404906709988912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,64,0,1,fp8,fp8,0,0.12819733222325644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,64,128,1,float16,float16,0,0.08083199958006541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,64,0,1,float16,float16,0,0.07905599971612294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,64,128,1,float16,fp8,0,0.08060800035794576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,64,128,1,fp8,fp8,0,0.07456533114115398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,64,0,1,float16,fp8,0,0.08082666496435802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,64,0,1,fp8,fp8,0,0.07524266839027405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,64,128,1,float16,float16,0,0.07865599791208903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,64,0,1,float16,float16,0,0.07906133433183034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,64,128,1,float16,fp8,0,0.07866133252779643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,64,128,1,fp8,fp8,0,0.07362666726112366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,64,0,1,float16,fp8,0,0.07946133116881053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,64,0,1,fp8,fp8,0,0.07387199997901917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,64,128,1,float16,float16,0,0.07881066699822743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,64,0,1,float16,float16,0,0.07865066826343536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,64,128,1,float16,fp8,0,0.07867200175921123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,64,128,1,fp8,fp8,0,0.07434133191903432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,64,0,1,float16,fp8,0,0.07913599908351898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,64,0,1,fp8,fp8,0,0.07418133318424225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,64,128,1,float16,float16,0,0.07885333398977916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,64,0,1,float16,float16,0,0.07842133442560832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,64,128,1,float16,fp8,0,0.08028266827265422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,64,128,1,fp8,fp8,0,0.07318933308124542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,64,0,1,float16,fp8,0,0.07876800000667572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,64,0,1,fp8,fp8,0,0.07483200232187907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,64,128,1,float16,float16,0,0.04825599988301595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,64,0,1,float16,float16,0,0.047930667797724404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,64,128,1,float16,fp8,0,0.0479360024134318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,64,128,1,fp8,fp8,0,0.046282668908437095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,64,0,1,float16,fp8,0,0.04781333108743032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,64,0,1,fp8,fp8,0,0.04576000074545542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,64,128,1,float16,float16,0,0.0479360024134318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,64,0,1,float16,float16,0,0.04577599962552389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,64,128,1,float16,fp8,0,0.04747733473777771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,64,128,1,fp8,fp8,0,0.04586666822433472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,64,0,1,float16,fp8,0,0.04573333263397217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,64,0,1,fp8,fp8,0,0.04567466676235199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,64,128,1,float16,float16,0,0.046629334489504494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,64,0,1,float16,float16,0,0.047584002216657005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,64,128,1,float16,fp8,0,0.04791999856630961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,64,128,1,fp8,fp8,0,0.04401599864164988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,64,0,1,float16,fp8,0,0.0476746658484141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,64,0,1,fp8,fp8,0,0.0440586656332016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,64,128,1,float16,float16,0,0.047194664676984154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,64,0,1,float16,float16,0,0.047695999344189964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,64,128,1,float16,fp8,0,0.048010667165120445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,64,128,1,fp8,fp8,0,0.04419200122356415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,64,0,1,float16,fp8,0,0.04765866696834564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,64,0,1,fp8,fp8,0,0.043935999274253845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,64,128,1,float16,float16,0,0.031311998764673867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,64,0,1,float16,float16,0,0.029605334003766377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,64,128,1,float16,fp8,0,0.03149333347876867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,64,128,1,fp8,fp8,0,0.030576000610987347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,64,0,1,float16,fp8,0,0.03121600051720937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,64,0,1,fp8,fp8,0,0.031514666974544525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,64,128,1,float16,float16,0,0.031386665999889374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,64,0,1,float16,float16,0,0.03142400085926056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,64,128,1,float16,fp8,0,0.03159466634194056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,64,128,1,fp8,fp8,0,0.029738667110602062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,64,0,1,float16,fp8,0,0.031370667119820915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,64,0,1,fp8,fp8,0,0.029205332199732464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,64,128,1,float16,float16,0,0.0314026673634847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,64,0,1,float16,float16,0,0.029322666426499683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,64,128,1,float16,fp8,0,0.03136533250411352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,64,128,1,fp8,fp8,0,0.02972800036271413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,64,0,1,float16,fp8,0,0.029978667696317036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,64,0,1,fp8,fp8,0,0.029552000264326733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,64,128,1,float16,float16,0,0.03081600119670232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,64,0,1,float16,float16,0,0.02957333376010259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,64,128,1,float16,fp8,0,0.03170666595300039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,64,128,1,fp8,fp8,0,0.029253333806991577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,64,0,1,float16,fp8,0,0.030645333230495453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,64,128,1,float16,float16,0,0.02146133283774058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,64,0,1,fp8,fp8,0,0.029050665597120922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,64,0,1,float16,float16,0,0.023178666830062866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,64,128,1,float16,fp8,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,64,128,1,fp8,fp8,0,0.02163733293612798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,64,0,1,float16,fp8,0,0.023183998962243397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,64,0,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,64,128,1,float16,float16,0,0.021456000705560047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,64,0,1,float16,float16,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,64,128,1,float16,fp8,0,0.02325333406527837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,64,128,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,64,0,1,float16,fp8,0,0.02145066608985265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,64,0,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,64,128,1,float16,float16,0,0.023168000082174938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,64,0,1,float16,float16,0,0.023157333334287006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,64,128,1,float16,fp8,0,0.0216799999276797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,64,128,1,fp8,fp8,0,0.023242667317390442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,64,0,1,float16,fp8,0,0.022986667851607006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,64,0,1,fp8,fp8,0,0.02279466638962428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,64,128,1,float16,float16,0,0.023130667706330616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,64,0,1,float16,float16,0,0.023141334454218548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,64,128,1,float16,fp8,0,0.02330133318901062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,64,128,1,fp8,fp8,0,0.02327466756105423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,64,0,1,float16,fp8,0,0.023077333966890972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,64,0,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,64,128,1,float16,float16,0,0.018917333334684372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,64,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,64,128,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,64,128,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,64,0,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,64,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,64,0,1,float16,float16,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,64,128,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,64,128,1,float16,float16,0,0.019573333362738293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,64,128,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,64,0,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,64,0,1,fp8,fp8,0,0.01969066634774208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,64,128,1,float16,float16,0,0.019343999524911244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,64,0,1,float16,float16,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,64,128,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,64,128,1,float16,fp8,0,0.02053333322207133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,64,0,1,float16,fp8,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,64,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,64,0,1,float16,float16,0,0.019381333142518997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,64,128,1,float16,float16,0,0.01978133370478948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,64,128,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,64,128,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,64,0,1,float16,fp8,0,0.021488000949223835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,64,0,1,fp8,fp8,0,0.019487999379634857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,64,128,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,64,0,1,float16,float16,0,0.016127999871969223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,64,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,64,0,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,64,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,64,128,1,float16,float16,0,0.017338667064905167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,64,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,64,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,64,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,64,128,1,float16,float16,0,0.016677333662907284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,64,0,1,float16,float16,0,0.01600533351302147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,64,128,1,fp8,fp8,0,0.01756799966096878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,64,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,64,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,64,0,1,float16,float16,0,0.01578666642308235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,64,128,1,float16,float16,0,0.017477333545684814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,64,128,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,64,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,64,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,64,0,1,float16,float16,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,64,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,64,0,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,64,128,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,64,128,1,float16,fp8,0,0.015919999529918034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,64,128,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,64,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,64,0,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,64,128,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,64,0,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,64,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,64,128,1,float16,float16,0,0.016735999534527462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,64,128,1,float16,fp8,0,0.016410666207472484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,64,128,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,64,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,2,64,128,1,float16,float16,0,0.22457067171732584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,2,64,0,1,float16,float16,0,0.22643733024597168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,2,64,128,1,float16,fp8,0,0.2248799999554952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,2,64,128,1,fp8,fp8,0,0.20586133003234863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,2,64,0,1,float16,fp8,0,0.2247520089149475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,2,64,0,1,fp8,fp8,0,0.2057653268178304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,4,64,128,1,float16,float16,0,0.22509332497914633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,4,64,0,1,float16,float16,0,0.22467732429504395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,4,64,128,1,float16,fp8,0,0.22643733024597168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,4,64,128,1,fp8,fp8,0,0.2057653268178304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,4,64,0,1,float16,fp8,0,0.22629332542419434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,4,64,0,1,fp8,fp8,0,0.20598934094111124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,8,64,128,1,float16,float16,0,0.22633065780003866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,8,64,0,1,float16,float16,0,0.22606933116912842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,8,64,128,1,float16,fp8,0,0.22639467318852743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,8,64,128,1,fp8,fp8,0,0.2059040069580078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,8,64,0,1,float16,fp8,0,0.22613867123921713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,8,64,0,1,fp8,fp8,0,0.20551466941833496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,0,0.12165333827336629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,48,64,128,1,float16,fp8,0,0.12158399820327759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,48,64,128,1,float16,float16,0,0.12057600418726604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,48,64,128,1,fp8,fp8,0,0.11134399970372517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,0,0.1200213332970937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,48,64,0,1,fp8,fp8,0,0.11125333110491435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,2,64,0,1,float16,float16,0,0.11955733100573222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,2,64,128,1,float16,float16,0,0.12043733398119609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,2,64,128,1,float16,fp8,0,0.1197760005791982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,2,64,128,1,fp8,fp8,0,0.10959466298421223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,2,64,0,1,float16,fp8,0,0.11987732847531636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,2,64,0,1,fp8,fp8,0,0.11070932944615682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,4,64,128,1,float16,float16,0,0.11969600121180217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,4,64,0,1,float16,float16,0,0.11974933743476868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,4,64,128,1,float16,fp8,0,0.12187199791272481
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,4,64,128,1,fp8,fp8,0,0.11195733149846394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,4,64,0,1,fp8,fp8,0,0.10947733124097188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,4,64,0,1,float16,fp8,0,0.1199626624584198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,8,64,128,1,float16,float16,0,0.12171733379364014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,8,64,128,1,float16,fp8,0,0.12153599659601848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,8,64,0,1,float16,float16,0,0.12144000331560771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,8,64,128,1,fp8,fp8,0,0.11141332983970642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,8,64,0,1,float16,fp8,0,0.12095999717712402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,8,64,0,1,fp8,fp8,0,0.111135999361674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,48,64,128,1,float16,float16,0,0.0685280015071233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,0,0.06845333178838094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,48,64,128,1,float16,fp8,0,0.06861866513888042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,48,64,128,1,fp8,fp8,0,0.06412266691525777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,0,0.06840000053246816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,48,64,0,1,fp8,fp8,0,0.06435200075308482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,2,64,128,1,float16,float16,0,0.06832533578077953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,2,64,0,1,float16,float16,0,0.06728533407052358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,2,64,128,1,float16,fp8,0,0.06866666674613953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,2,64,128,1,fp8,fp8,0,0.06402666866779327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,2,64,0,1,float16,fp8,0,0.06821333368619283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,2,64,0,1,fp8,fp8,0,0.06431999802589417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,4,64,128,1,float16,float16,0,0.06863466898600261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,4,64,0,1,float16,float16,0,0.0682826687892278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,4,64,128,1,float16,fp8,0,0.06820799907048543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,4,64,128,1,fp8,fp8,0,0.06417066852251689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,4,64,0,1,float16,fp8,0,0.06826133529345195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,4,64,0,1,fp8,fp8,0,0.06379200021425883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,8,64,128,1,float16,float16,0,0.06881066660086314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,8,64,0,1,float16,float16,0,0.06685866912206014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,8,64,128,1,float16,fp8,0,0.06832533578077953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,8,64,128,1,fp8,fp8,0,0.06226666768391927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,48,64,128,1,float16,float16,0,0.0417546679576238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,8,64,0,1,float16,fp8,0,0.06810666620731354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,8,64,0,1,fp8,fp8,0,0.06267199913660686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,0,0.04196799794832865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,48,64,128,1,float16,fp8,0,0.041946664452552795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,48,64,128,1,fp8,fp8,0,0.03967999915281931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,0,0.04189866781234741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,48,64,0,1,fp8,fp8,0,0.039647998909155525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,2,64,128,1,float16,float16,0,0.042709335684776306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,2,64,0,1,float16,float16,0,0.04204800228277842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,2,64,128,1,float16,fp8,0,0.04208533465862274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,2,64,128,1,fp8,fp8,0,0.039962666730086006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,2,64,0,1,float16,fp8,0,0.04371733466784159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,2,64,0,1,fp8,fp8,0,0.039621333281199135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,4,64,128,1,float16,float16,0,0.043477331598599754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,4,64,0,1,float16,float16,0,0.04207466542720795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,4,64,128,1,float16,fp8,0,0.04165333261092504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,4,64,128,1,fp8,fp8,0,0.039813332259655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,4,64,0,1,float16,fp8,0,0.04377066592375437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,4,64,0,1,fp8,fp8,0,0.039647998909155525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,8,64,128,1,float16,float16,0,0.04153066625197729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,8,64,0,1,float16,float16,0,0.04200000067551931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,8,64,128,1,float16,fp8,0,0.04187199970086416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,8,64,128,1,fp8,fp8,0,0.039877332746982574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,48,64,128,1,float16,float16,0,0.029722665747006733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,8,64,0,1,float16,fp8,0,0.041797334949175514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,8,64,0,1,fp8,fp8,0,0.03978666663169861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,0,0.02939733366171519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,48,64,128,1,float16,fp8,0,0.029626667499542236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,48,64,128,1,fp8,fp8,0,0.027237333357334137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,0,0.02926933268706004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,48,64,0,1,fp8,fp8,0,0.02743999908367793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,2,64,128,1,float16,float16,0,0.029504001140594482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,2,64,0,1,float16,float16,0,0.029418667157491047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,2,64,128,1,float16,fp8,0,0.029504001140594482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,2,64,128,1,fp8,fp8,0,0.027429332335789997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,2,64,0,1,float16,fp8,0,0.029418667157491047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,2,64,0,1,fp8,fp8,0,0.02716800073782603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,4,64,128,1,float16,float16,0,0.029258665939172108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,4,64,0,1,float16,float16,0,0.029274667302767437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,4,64,128,1,float16,fp8,0,0.02951466788848241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,4,64,128,1,fp8,fp8,0,0.027642667293548584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,4,64,0,1,float16,fp8,0,0.029322666426499683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,4,64,0,1,fp8,fp8,0,0.027301333844661713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,8,64,128,1,float16,float16,0,0.029530666768550873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,8,64,0,1,float16,float16,0,0.029504001140594482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,8,64,128,1,float16,fp8,0,0.029274667302767437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,8,64,128,1,fp8,fp8,0,0.027690666417280834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,8,64,0,1,float16,fp8,0,0.029504001140594482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,8,64,0,1,fp8,fp8,0,0.02735466758410136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,48,64,128,1,float16,float16,0,0.02139200021823247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,0,0.021386665602525074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,48,64,128,1,fp8,fp8,0,0.020938667158285778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,48,64,128,1,float16,fp8,0,0.023567999402681988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,48,64,0,1,fp8,fp8,0,0.02145066608985265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,2,64,128,1,float16,float16,0,0.021338666478792827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,2,64,0,1,float16,float16,0,0.02313599983851115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,2,64,128,1,float16,fp8,0,0.02369600037733714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,2,64,128,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,2,64,0,1,float16,fp8,0,0.023056000471115112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,2,64,0,1,fp8,fp8,0,0.021407999098300934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,4,64,128,1,float16,float16,0,0.022357332209746044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,4,64,0,1,float16,float16,0,0.02149333308140437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,4,64,128,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,4,64,128,1,fp8,fp8,0,0.02107200026512146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,4,64,0,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,8,64,128,1,float16,float16,0,0.021205333371957142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,4,64,0,1,float16,fp8,0,0.021733333667119343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,8,64,0,1,float16,float16,0,0.021295999487241108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,8,64,128,1,float16,fp8,0,0.021530665457248688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,8,64,128,1,fp8,fp8,0,0.02128000060717265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,8,64,0,1,float16,fp8,0,0.021664001047611237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,8,64,0,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,48,64,128,1,float16,float16,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,0,0.019402666638294857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,48,64,128,1,float16,fp8,0,0.018021332720915478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,48,64,128,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,0,0.019589333484570186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,48,64,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,2,64,128,1,float16,float16,0,0.019018666197856266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,2,64,0,1,float16,float16,0,0.01931200052301089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,2,64,128,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,2,64,128,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,2,64,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,2,64,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,4,64,128,1,float16,float16,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,4,64,128,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,4,64,0,1,float16,float16,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,4,64,128,1,fp8,fp8,0,0.019152000546455383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,4,64,0,1,float16,fp8,0,0.019466667125622433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,4,64,0,1,fp8,fp8,0,0.018672000616788864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,8,64,128,1,float16,float16,0,0.019381333142518997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,8,64,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,8,64,128,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,8,64,128,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,8,64,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,8,64,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,48,64,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,48,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,48,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,48,64,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,2,64,128,1,float16,float16,0,0.015125333021084467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,2,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,2,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,2,64,128,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,2,64,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,2,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,4,64,128,1,float16,float16,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,4,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,4,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,4,64,128,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,4,64,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,4,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,8,64,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,8,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,8,64,128,1,float16,fp8,0,0.017535999417304993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,8,64,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,8,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,8,64,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,48,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,48,64,128,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,48,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,48,64,0,1,fp8,fp8,0,0.01534933348496755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,2,64,128,1,float16,float16,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,2,64,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,2,64,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,2,64,128,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,2,64,0,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,2,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,4,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,4,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,4,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,4,64,128,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,4,64,0,1,fp8,fp8,0,0.016751999656359356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,4,64,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,8,64,128,1,float16,float16,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,8,64,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,8,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,8,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,8,64,0,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,8,64,0,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,64,128,1,float16,float16,0,2.2282026608784995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,64,128,1,float16,fp8,0,2.245962619781494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,64,128,1,fp8,fp8,0,2.0583680470784507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,64,128,1,float16,float16,0,2.2446346282958984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,64,0,1,float16,float16,0,13.220315297444662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,64,0,1,fp8,fp8,0,12.278096516927084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,64,128,1,float16,fp8,0,2.2617440223693848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,64,0,1,float16,fp8,0,13.490005493164062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,64,128,1,fp8,fp8,0,2.0795253117879233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,64,128,1,float16,float16,0,2.2795519828796387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,64,0,1,float16,float16,0,13.821818033854166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,64,128,1,float16,fp8,0,2.299935976664225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,64,128,1,fp8,fp8,0,2.121861298878988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,64,0,1,fp8,fp8,0,12.297808329264322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,64,0,1,float16,fp8,0,13.391007741292318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,64,128,1,float16,float16,0,1.3073546886444092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,64,0,1,float16,float16,0,14.19863510131836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,64,128,1,float16,fp8,0,1.334671974182129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,64,128,1,fp8,fp8,0,1.252895991007487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,64,0,1,float16,float16,0,6.959290822347005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,64,0,1,fp8,fp8,0,12.340794881184896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,64,0,1,float16,fp8,0,14.311381022135416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,64,128,1,float16,float16,0,1.1607733567555745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,64,128,1,float16,fp8,0,1.1712106863657634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,64,0,1,fp8,fp8,0,6.414821624755859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,64,0,1,float16,fp8,0,7.502330780029297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,64,128,1,fp8,fp8,0,1.0731039841969807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,64,0,1,float16,float16,0,7.6301225026448565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,64,128,1,float16,float16,0,1.168613354365031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,64,128,1,float16,fp8,0,1.1791679859161377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,64,0,1,float16,fp8,0,7.086549123128255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,64,0,1,fp8,fp8,0,6.231557210286458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,64,128,1,fp8,fp8,0,1.0823413530985515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,64,128,1,float16,float16,0,1.1818186442057292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,64,0,1,float16,float16,0,7.078671773274739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,64,0,1,fp8,fp8,0,6.241343816121419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,64,128,1,float16,fp8,0,1.195199966430664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,64,0,1,float16,fp8,0,7.531824111938477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,64,128,1,fp8,fp8,0,1.1028640270233154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,64,128,1,float16,float16,0,0.7180426915486654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,64,0,1,float16,float16,0,6.740175882975261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,64,128,1,float16,fp8,0,0.735088030497233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,64,128,1,fp8,fp8,0,0.6954560279846191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,64,0,1,float16,float16,0,3.565066655476888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,64,0,1,float16,fp8,0,7.008064270019531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,64,0,1,fp8,fp8,0,6.259194691975911
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,64,128,1,float16,float16,0,0.6986292997996012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,64,128,1,float16,fp8,0,0.6545333464940389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,64,0,1,float16,fp8,0,3.583002726236979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,64,0,1,fp8,fp8,0,3.32478396097819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,64,128,1,fp8,fp8,0,0.6077866554260254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,64,0,1,float16,float16,0,3.683151880900065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,64,128,1,float16,float16,0,0.6523679892222086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,64,128,1,float16,fp8,0,0.6585226853688558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,64,0,1,fp8,fp8,0,3.2346506118774414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,64,128,1,fp8,fp8,0,0.6115626494089762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,64,0,1,float16,fp8,0,3.5952908198038735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,64,128,1,float16,float16,0,0.6593813498814901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,64,0,1,float16,float16,0,3.6956799825032554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,64,128,1,float16,fp8,0,0.6971306800842285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,64,0,1,fp8,fp8,0,3.2376906077067056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,64,0,1,float16,fp8,0,3.4878241221110025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,64,128,1,fp8,fp8,0,0.6214666763941447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,64,128,1,float16,float16,0,0.5027733246485392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,64,0,1,float16,float16,0,3.5159308115641275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,64,128,1,float16,fp8,0,0.5034026702245077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,64,128,1,fp8,fp8,0,0.47226667404174805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,64,0,1,fp8,fp8,0,3.2501598993937173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,64,0,1,float16,float16,0,1.9789759318033855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,64,0,1,float16,fp8,0,3.519887924194336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,64,128,1,float16,float16,0,0.5024799903233846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,64,128,1,float16,fp8,0,0.5007359981536865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,64,0,1,float16,fp8,0,1.978885332743327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,64,0,1,fp8,fp8,0,1.8370985984802246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,64,0,1,float16,float16,0,1.9684425989786785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,64,128,1,fp8,fp8,0,0.46964800357818604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,64,128,1,float16,float16,0,0.5010720094045004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,64,0,1,float16,fp8,0,1.9696319897969563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,64,128,1,float16,fp8,0,0.5006239811579386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,64,0,1,fp8,fp8,0,1.8367573420206706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,64,0,1,float16,float16,0,1.9712212880452473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,64,128,1,fp8,fp8,0,0.4719359874725342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,64,128,1,float16,float16,0,0.5004640022913615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,64,0,1,fp8,fp8,0,1.8364799817403157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,64,0,1,float16,fp8,0,1.9704000155131023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,64,128,1,float16,fp8,0,0.5002986590067545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,64,0,1,float16,float16,0,1.970117410024007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,64,128,1,fp8,fp8,0,0.4727199872334798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,64,0,1,fp8,fp8,0,1.8352905909220378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,64,0,1,float16,fp8,0,1.9730933507283528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,64,128,1,float16,float16,0,1.654538631439209
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,64,128,1,fp8,fp8,0,1.5257226626078289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,64,128,1,float16,fp8,0,1.6682772636413574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,64,128,1,float16,float16,0,1.6646560033162434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,64,0,1,float16,float16,0,8.34104029337565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,64,0,1,float16,fp8,0,7.910559972127278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,64,0,1,fp8,fp8,0,7.242511749267578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,64,128,1,float16,fp8,0,1.6880639394124348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,64,128,1,fp8,fp8,0,1.5412373542785645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,64,0,1,float16,float16,0,7.814730962117513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,64,128,1,float16,float16,0,1.6909972826639812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,64,128,1,float16,fp8,0,1.7070933977762859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,64,0,1,fp8,fp8,0,7.256095886230469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,64,128,1,fp8,fp8,0,1.572938601175944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,64,0,1,float16,fp8,0,8.110394795735678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,64,128,1,float16,float16,0,0.9814666906992594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,64,0,1,float16,float16,0,7.854165395100911
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,64,128,1,float16,fp8,0,1.0046506722768147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,64,128,1,fp8,fp8,0,0.9409493605295817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,64,0,1,float16,float16,0,4.315381368001302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,64,0,1,fp8,fp8,0,7.28877321879069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,64,0,1,float16,fp8,0,8.441882451375326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,64,128,1,float16,float16,0,0.8734506766001383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,64,128,1,float16,fp8,0,0.8814720312754313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,64,0,1,float16,fp8,0,4.153653462727864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,64,128,1,fp8,fp8,0,0.8094186782836914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,64,0,1,fp8,fp8,0,3.837029457092285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,64,128,1,float16,float16,0,0.8777653376261393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,64,0,1,float16,float16,0,4.102352142333984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,64,128,1,float16,fp8,0,0.8872266610463461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,64,0,1,fp8,fp8,0,3.7032801310221353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,64,0,1,float16,fp8,0,4.02016003926595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,64,128,1,fp8,fp8,0,0.816309372584025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,64,128,1,float16,float16,0,0.8900907039642334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,64,0,1,float16,float16,0,4.186154683430989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,64,0,1,fp8,fp8,0,3.711146672566732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,64,0,1,float16,fp8,0,4.0007680257161455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,64,128,1,float16,fp8,0,0.8996853033701578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,64,128,1,fp8,fp8,0,0.8303146362304688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,64,0,1,float16,float16,0,4.116239865620931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,64,128,1,float16,float16,0,0.5570613145828247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,64,0,1,fp8,fp8,0,3.7244532903035483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,64,0,1,float16,fp8,0,4.0226240158081055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,64,0,1,float16,float16,0,2.1759467124938965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,64,128,1,float16,fp8,0,0.5577280124028524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,64,128,1,fp8,fp8,0,0.527509331703186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,64,128,1,float16,float16,0,0.49464531739552814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,64,128,1,float16,fp8,0,0.49821333090464276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,64,0,1,float16,fp8,0,2.168794631958008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,64,0,1,fp8,fp8,0,2.0104799270629883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,64,128,1,fp8,fp8,0,0.4618399937947591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,64,0,1,float16,float16,0,2.0867199897766113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,64,128,1,float16,float16,0,0.4968159993489583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,64,128,1,float16,fp8,0,0.5005280176798502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,64,0,1,fp8,fp8,0,1.944698651631673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,64,0,1,float16,fp8,0,2.093653361002604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,64,128,1,fp8,fp8,0,0.4650133450826009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,64,0,1,float16,float16,0,2.0929013888041177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,64,128,1,float16,float16,0,0.5023306608200073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,64,128,1,float16,fp8,0,0.5071413516998291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,64,0,1,fp8,fp8,0,1.9494986534118652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,64,0,1,float16,fp8,0,2.0981012980143228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,64,128,1,fp8,fp8,0,0.4717866579691569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,64,0,1,float16,float16,0,2.100192070007324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,64,128,1,float16,float16,0,0.38011733690897626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,64,128,1,float16,fp8,0,0.3800640106201172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,64,0,1,float16,float16,0,1.2283573150634766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,64,0,1,fp8,fp8,0,1.9554613431294758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,64,0,1,float16,fp8,0,2.1071413358052573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,64,128,1,fp8,fp8,0,0.35755733648935956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,64,128,1,float16,float16,0,0.38186665376027423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,64,0,1,fp8,fp8,0,1.1385760307312012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,64,0,1,float16,fp8,0,1.2282133102416992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,64,128,1,float16,fp8,0,0.3822186787923177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,64,0,1,float16,float16,0,1.2170720100402832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,64,128,1,fp8,fp8,0,0.3574666579564412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,64,128,1,float16,float16,0,0.37910401821136475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,64,0,1,fp8,fp8,0,1.138266642888387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,64,0,1,float16,fp8,0,1.219541311264038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,64,128,1,float16,fp8,0,0.37965333461761475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,64,0,1,float16,float16,0,1.2219040393829346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,64,128,1,fp8,fp8,0,0.35951467355092365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,64,128,1,float16,float16,0,0.382426659266154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,64,0,1,float16,fp8,0,1.2223626772562664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,64,0,1,fp8,fp8,0,1.136624018351237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,64,128,1,float16,fp8,0,0.38283201058705646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,64,0,1,float16,float16,0,1.2227413654327393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,64,128,1,fp8,fp8,0,0.3601919809977214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,64,0,1,float16,fp8,0,1.2227520147959392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,64,0,1,fp8,fp8,0,1.137237310409546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,64,128,1,float16,float16,0,1.3753867149353027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,64,128,1,float16,fp8,0,1.386298656463623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,64,128,1,fp8,fp8,0,1.2663733164469402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,64,128,1,float16,float16,0,1.384709358215332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,64,0,1,float16,float16,0,5.689589182535808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,64,0,1,float16,fp8,0,5.6317494710286455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,64,128,1,float16,fp8,0,1.399295965830485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,64,0,1,fp8,fp8,0,5.212634722391765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,64,128,1,fp8,fp8,0,1.278544028600057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,64,0,1,float16,float16,0,5.894128163655599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,64,128,1,float16,float16,0,1.4053066571553547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,64,128,1,float16,fp8,0,1.4198452631632488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,64,0,1,fp8,fp8,0,5.225541432698567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,64,0,1,float16,fp8,0,5.721551895141602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,64,128,1,fp8,fp8,0,1.3041866620381672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,64,128,1,float16,float16,0,0.8200639883677164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,64,0,1,float16,float16,0,5.799519856770833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,64,128,1,float16,fp8,0,0.8483786582946777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,64,0,1,float16,float16,0,2.9967734018961587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,64,128,1,fp8,fp8,0,0.7865760326385498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,64,0,1,fp8,fp8,0,5.251466751098633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,64,0,1,float16,fp8,0,5.895711898803711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,64,128,1,float16,float16,0,0.7323093414306641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,64,128,1,float16,fp8,0,0.7385706901550293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,64,0,1,float16,fp8,0,3.1705760955810547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,64,0,1,fp8,fp8,0,2.794362703959147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,64,128,1,fp8,fp8,0,0.6773386796315511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,64,0,1,float16,float16,0,2.9881280263264975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,64,128,1,float16,float16,0,0.7352586587270101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,64,128,1,float16,fp8,0,0.7424906889597574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,64,0,1,float16,fp8,0,2.8926401138305664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,64,0,1,fp8,fp8,0,2.6819626490275064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,64,128,1,fp8,fp8,0,0.6828853289286295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,64,0,1,float16,float16,0,3.0028158823649087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,64,128,1,float16,float16,0,0.7454559803009033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,64,128,1,float16,fp8,0,0.7547893524169922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,64,0,1,fp8,fp8,0,2.6865866978963218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,64,0,1,float16,fp8,0,2.90118408203125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,64,128,1,fp8,fp8,0,0.6952533721923828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,64,128,1,float16,float16,0,0.4559733470280965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,64,0,1,float16,float16,0,3.038895924886068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,64,128,1,float16,fp8,0,0.47772268454233807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,64,0,1,float16,float16,0,1.574677308400472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,64,128,1,fp8,fp8,0,0.44882134596506756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,64,0,1,fp8,fp8,0,2.6984052658081055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,64,0,1,float16,fp8,0,3.005525271097819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,64,128,1,float16,float16,0,0.41885332266489667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,64,0,1,float16,fp8,0,1.5867625872294109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,64,0,1,fp8,fp8,0,1.4746026992797852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,64,128,1,float16,fp8,0,0.4182240168253581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,64,128,1,fp8,fp8,0,0.38838934898376465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,64,0,1,float16,float16,0,1.524288018544515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,64,128,1,float16,float16,0,0.4171893199284871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,64,0,1,float16,fp8,0,1.525162696838379
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,64,0,1,fp8,fp8,0,1.4212160110473633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,64,128,1,float16,fp8,0,0.42043201128641766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,64,128,1,fp8,fp8,0,0.39020800590515137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,64,0,1,float16,float16,0,1.5273280143737793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,64,128,1,float16,float16,0,0.4211733341217041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,64,0,1,fp8,fp8,0,1.4220587412516277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,64,0,1,float16,fp8,0,1.5309707323710124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,64,128,1,float16,fp8,0,0.4256853262583415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,64,128,1,fp8,fp8,0,0.3965173165003459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,64,0,1,float16,float16,0,1.534272034962972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,64,128,1,float16,float16,0,0.32204266389211017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,64,128,1,float16,fp8,0,0.32332799832026166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,64,0,1,float16,float16,0,0.9178880055745443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,64,0,1,fp8,fp8,0,1.4287625948588054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,64,128,1,fp8,fp8,0,0.30402666330337524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,64,0,1,float16,fp8,0,1.5385653177897136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,64,128,1,float16,float16,0,0.3205066720644633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,64,0,1,fp8,fp8,0,0.8506613572438558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,64,0,1,float16,fp8,0,0.9182133674621582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,64,128,1,float16,fp8,0,0.3203306595484416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,64,0,1,float16,float16,0,0.9091946283976237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,64,128,1,fp8,fp8,0,0.30208534002304077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,64,128,1,float16,float16,0,0.32097067435582477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,64,0,1,fp8,fp8,0,0.8475573062896729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,64,0,1,float16,fp8,0,0.910912036895752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,64,128,1,float16,fp8,0,0.3208693265914917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,64,0,1,float16,float16,0,0.9107306798299154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,64,128,1,fp8,fp8,0,0.301749328772227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,64,128,1,float16,float16,0,0.3222506642341614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,64,0,1,float16,fp8,0,0.912938674290975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,64,0,1,fp8,fp8,0,0.8465706507364908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,64,128,1,float16,fp8,0,0.32105066378911334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,64,0,1,float16,float16,0,0.9121387004852295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,64,128,1,fp8,fp8,0,0.30180267492930096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,64,0,1,float16,fp8,0,0.9092640082041422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,64,0,1,fp8,fp8,0,0.8485386371612549
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,64,128,1,float16,float16,0,2.1688267389933267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,64,128,1,float16,fp8,0,2.187664031982422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,64,128,1,fp8,fp8,0,1.997754732767741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,64,128,1,float16,float16,0,2.1825013160705566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,64,0,1,float16,float16,0,7.911680221557617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,64,0,1,fp8,fp8,0,6.951152165730794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,64,0,1,float16,fp8,0,7.5370133717854815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,64,128,1,float16,fp8,0,2.199615955352783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,64,128,1,fp8,fp8,0,2.0175840059916177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,64,0,1,float16,float16,0,7.527151743570964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,64,128,1,float16,float16,0,2.221322695414225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,64,128,1,float16,fp8,0,2.2381919225056968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,64,0,1,fp8,fp8,0,6.971408208211263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,64,0,1,float16,fp8,0,7.625573476155599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,64,128,1,fp8,fp8,0,2.058506647745768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,64,0,1,float16,float16,0,7.583536148071289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,64,128,1,float16,float16,0,1.2510026295979817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,64,128,1,float16,fp8,0,1.2765599886576335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,64,128,1,fp8,fp8,0,1.1927573680877686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,64,0,1,fp8,fp8,0,7.013050715128581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,64,0,1,float16,float16,0,3.9755894343058267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,64,0,1,float16,fp8,0,7.8995412190755205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,64,128,1,float16,float16,0,1.1018400192260742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,64,128,1,float16,fp8,0,1.1116586526234944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,64,0,1,float16,fp8,0,4.136970520019531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,64,0,1,fp8,fp8,0,3.6984427769978843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,64,128,1,fp8,fp8,0,1.0163520177205403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,64,0,1,float16,float16,0,3.9740587870279946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,64,128,1,float16,float16,0,1.110368013381958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,64,128,1,float16,fp8,0,1.1207359631856282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,64,0,1,float16,fp8,0,3.8635520935058594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,64,0,1,fp8,fp8,0,3.516874631245931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,64,128,1,fp8,fp8,0,1.0245920022328694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,64,0,1,float16,float16,0,4.095871925354004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,64,128,1,float16,float16,0,1.124346653620402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,64,128,1,float16,fp8,0,1.1361599763234456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,64,0,1,fp8,fp8,0,3.525690714518229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,64,0,1,float16,fp8,0,3.881850560506185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,64,128,1,fp8,fp8,0,1.0427467028299968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,64,128,1,float16,float16,0,0.6585653225580851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,64,0,1,float16,float16,0,4.241434733072917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,64,128,1,float16,fp8,0,0.6747732957204183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,64,0,1,fp8,fp8,0,3.5450239181518555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,64,0,1,float16,fp8,0,3.883114814758301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,64,0,1,float16,float16,0,2.04639466603597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,64,128,1,fp8,fp8,0,0.6327146689097086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,64,128,1,float16,float16,0,0.5876906712849935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,64,128,1,float16,fp8,0,0.5928213198979696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,64,0,1,float16,fp8,0,2.0615359942118325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,64,0,1,fp8,fp8,0,1.9087573687235515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,64,0,1,float16,float16,0,1.960879961649577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,64,128,1,fp8,fp8,0,0.5458026727040609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,64,128,1,float16,float16,0,0.590720017751058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,64,128,1,float16,fp8,0,0.5965386629104614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,64,0,1,fp8,fp8,0,1.820298671722412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,64,0,1,float16,fp8,0,1.9656640688578289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,64,0,1,float16,float16,0,1.9674399693806965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,64,128,1,fp8,fp8,0,0.5508319934209188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,64,128,1,float16,float16,0,0.5989919900894165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,64,0,1,float16,fp8,0,2.0667573610941568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,64,0,1,fp8,fp8,0,1.8271253903706868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,64,128,1,float16,fp8,0,0.6331573327382406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,64,128,1,fp8,fp8,0,0.5605866511662801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,64,0,1,float16,float16,0,1.9769493738810222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,64,128,1,float16,float16,0,0.3672800064086914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,64,0,1,fp8,fp8,0,1.8349706331888835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,64,0,1,float16,fp8,0,1.983023961385091
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,64,0,1,float16,float16,0,1.0873226324717205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,64,128,1,float16,fp8,0,0.37621867656707764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,64,128,1,fp8,fp8,0,0.35658133029937744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,64,128,1,float16,float16,0,0.3308639923731486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,64,0,1,float16,fp8,0,1.0964372952779133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,64,0,1,fp8,fp8,0,1.020037333170573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,64,128,1,fp8,fp8,0,0.31246399879455566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,64,128,1,float16,fp8,0,0.3335786660512288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,64,0,1,float16,float16,0,1.04529603322347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,64,128,1,float16,float16,0,0.33341864744822186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,64,0,1,float16,fp8,0,1.0480799674987793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,64,0,1,fp8,fp8,0,0.9756426811218262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,64,128,1,float16,fp8,0,0.3368053436279297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,64,128,1,fp8,fp8,0,0.31509333848953247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,64,0,1,float16,float16,0,1.0494186878204346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,64,128,1,float16,float16,0,0.3391253153483073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,64,0,1,float16,fp8,0,1.0518933137257893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,64,0,1,fp8,fp8,0,0.9788533051808676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,64,128,1,float16,fp8,0,0.34301332632700604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,64,0,1,float16,float16,0,1.0540693600972493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,64,128,1,fp8,fp8,0,0.3203893303871155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,64,128,1,float16,float16,0,0.26224533716837567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,64,0,1,float16,fp8,0,1.0607946713765461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,64,128,1,float16,fp8,0,0.2632586757342021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,64,0,1,float16,float16,0,0.6493546565373739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,64,0,1,fp8,fp8,0,0.9840746720631918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,64,128,1,fp8,fp8,0,0.2468000054359436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,64,128,1,float16,float16,0,0.25996800263722736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,64,0,1,fp8,fp8,0,0.6029066642125448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,64,0,1,float16,fp8,0,0.6494559844334921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,64,128,1,float16,fp8,0,0.2598293423652649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,64,128,1,fp8,fp8,0,0.24610666433970133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,64,0,1,float16,float16,0,0.6444106499354044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,64,0,1,float16,fp8,0,0.6448373397191366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,64,128,1,float16,float16,0,0.2603306571642558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,64,0,1,fp8,fp8,0,0.5997013250986735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,64,0,1,float16,float16,0,0.6436853408813477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,64,128,1,float16,fp8,0,0.26126933097839355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,64,128,1,fp8,fp8,0,0.24664533138275146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,64,0,1,float16,fp8,0,0.6441173156102499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,64,128,1,float16,float16,0,0.26100265979766846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,64,0,1,fp8,fp8,0,0.5998186667760214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,64,0,1,float16,float16,0,0.645034670829773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,64,128,1,float16,fp8,0,0.26078399022420246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,64,128,1,fp8,fp8,0,0.24685867627461752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,64,0,1,float16,fp8,0,0.6462933222452799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,64,0,1,fp8,fp8,0,0.6003679831822714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,64,128,1,float16,float16,0,1.6077440579732258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,64,128,1,fp8,fp8,0,1.4787200291951497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,64,128,1,float16,fp8,0,1.6213866869608562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,64,128,1,float16,float16,0,1.6179413795471191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,64,0,1,float16,float16,0,4.567941347757976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,64,0,1,float16,fp8,0,4.561861356099446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,64,0,1,fp8,fp8,0,4.205471992492676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,64,128,1,float16,fp8,0,1.6327786445617676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,64,128,1,fp8,fp8,0,1.4946880340576172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,64,0,1,float16,float16,0,4.69049612681071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,64,128,1,float16,float16,0,1.6439894040425618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,64,0,1,fp8,fp8,0,4.220629374186198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,64,0,1,float16,fp8,0,4.591439882914226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,64,128,1,float16,fp8,0,1.6611785888671875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,64,128,1,fp8,fp8,0,1.5273386637369792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,64,128,1,float16,float16,0,0.9408000310262045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,64,0,1,float16,float16,0,4.599125226338704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,64,128,1,float16,fp8,0,0.9603839715321859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,64,0,1,float16,fp8,0,4.614389419555664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,64,0,1,fp8,fp8,0,4.254165331522624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,64,128,1,fp8,fp8,0,0.8973066806793213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,64,0,1,float16,float16,0,2.488272031148275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,64,128,1,float16,float16,0,0.8294186592102051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,64,128,1,float16,fp8,0,0.8372960090637207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,64,0,1,fp8,fp8,0,2.2804746627807617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,64,0,1,float16,fp8,0,2.491754690806071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,64,0,1,float16,float16,0,2.3179732958475747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,64,128,1,fp8,fp8,0,0.7644586563110352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,64,128,1,float16,float16,0,0.8361279964447021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,64,0,1,fp8,fp8,0,2.1472800572713218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,64,128,1,float16,fp8,0,0.8450133005777994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,64,0,1,float16,fp8,0,2.3275893529256186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,64,0,1,float16,float16,0,2.328261375427246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,64,128,1,fp8,fp8,0,0.7727093696594238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,64,128,1,float16,float16,0,0.8475786844889323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,64,0,1,fp8,fp8,0,2.1543946266174316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,64,0,1,float16,fp8,0,2.3368852933247886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,64,128,1,float16,fp8,0,0.8570133050282797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,64,128,1,fp8,fp8,0,0.7871946493784586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,64,0,1,float16,float16,0,2.3437013626098633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,64,128,1,float16,float16,0,0.5002453327178955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,64,0,1,float16,fp8,0,2.4507999420166016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,64,0,1,fp8,fp8,0,2.168543974558512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,64,0,1,float16,float16,0,1.2728160222371419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,64,128,1,float16,fp8,0,0.5324053366978964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,64,128,1,fp8,fp8,0,0.4798293511072795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,64,128,1,float16,float16,0,0.4453386863072713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,64,0,1,float16,fp8,0,1.283882697423299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,64,0,1,fp8,fp8,0,1.1887839635213215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,64,128,1,float16,fp8,0,0.4495786825815837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,64,128,1,fp8,fp8,0,0.41513065497080487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,64,0,1,float16,float16,0,1.2095893224080403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,64,128,1,float16,float16,0,0.4501440127690633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,64,0,1,fp8,fp8,0,1.1245280106862385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,64,0,1,float16,fp8,0,1.213861306508382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,64,128,1,fp8,fp8,0,0.4196586608886719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,64,128,1,float16,fp8,0,0.4540746609369914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,64,0,1,float16,float16,0,1.213253339131673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,64,128,1,float16,float16,0,0.4540799856185913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,64,0,1,fp8,fp8,0,1.1273547013600667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,64,0,1,float16,fp8,0,1.2196160157521565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,64,128,1,float16,fp8,0,0.4608159859975179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,64,128,1,fp8,fp8,0,0.4267466862996419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,64,0,1,float16,float16,0,1.2220853169759114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,64,128,1,float16,float16,0,0.2811466654141744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,64,0,1,float16,fp8,0,1.228122631708781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,64,128,1,float16,fp8,0,0.28755732377370197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,64,0,1,fp8,fp8,0,1.1337707042694092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,64,0,1,float16,float16,0,0.6873546441396078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,64,128,1,fp8,fp8,0,0.2732479969660441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,64,128,1,float16,float16,0,0.2507999936739604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,64,0,1,float16,fp8,0,0.6933706601460775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,64,0,1,fp8,fp8,0,0.6457759936650594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,64,128,1,float16,fp8,0,0.25407999753952026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,64,0,1,float16,float16,0,0.653877337773641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,64,128,1,fp8,fp8,0,0.23886932929356894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,64,128,1,float16,float16,0,0.2532053391138713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,64,0,1,float16,fp8,0,0.6545759836832682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,64,0,1,fp8,fp8,0,0.6108266512552897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,64,128,1,float16,fp8,0,0.25619200865427655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,64,0,1,float16,float16,0,0.6567306518554688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,64,128,1,fp8,fp8,0,0.24084800481796265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,64,128,1,float16,float16,0,0.2590666611989339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,64,0,1,float16,fp8,0,0.6597653230031332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,64,0,1,fp8,fp8,0,0.614464004834493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,64,128,1,float16,fp8,0,0.2608746687571208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,64,0,1,float16,float16,0,0.6616479953130087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,64,128,1,fp8,fp8,0,0.2444266676902771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,64,128,1,float16,float16,0,0.20219733317693075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,64,0,1,float16,fp8,0,0.6645600001017252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,64,0,1,float16,float16,0,0.4254719813664754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,64,0,1,fp8,fp8,0,0.6174240112304688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,64,128,1,float16,fp8,0,0.20223466555277506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,64,128,1,fp8,fp8,0,0.19024533033370972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,64,128,1,float16,float16,0,0.19957866271336874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,64,0,1,float16,fp8,0,0.42642664909362793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,64,0,1,fp8,fp8,0,0.3937600056330363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,64,0,1,float16,float16,0,0.4209866523742676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,64,128,1,float16,fp8,0,0.19964265823364258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,64,128,1,fp8,fp8,0,0.1873226761817932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,64,0,1,float16,fp8,0,0.42025065422058105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,64,128,1,float16,float16,0,0.19953066110610962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,64,0,1,fp8,fp8,0,0.39002664883931476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,64,0,1,float16,float16,0,0.4198400179545085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,64,128,1,float16,fp8,0,0.19939200083414713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,64,128,1,fp8,fp8,0,0.18759999672571817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,64,128,1,float16,float16,0,0.19992534319559732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,64,0,1,float16,fp8,0,0.4225706656773885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,64,0,1,fp8,fp8,0,0.3898400068283081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,64,0,1,float16,float16,0,0.4206399917602539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,64,128,1,float16,fp8,0,0.1994719902674357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,64,128,1,fp8,fp8,0,0.18928533792495728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,64,0,1,float16,fp8,0,0.42003734906514484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,64,0,1,fp8,fp8,0,0.39161598682403564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,64,128,1,float16,float16,0,2.1370347340901694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,64,128,1,fp8,fp8,0,1.9642292658487956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,64,128,1,float16,fp8,0,2.149306615193685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,64,0,1,float16,float16,0,4.609312057495117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,64,128,1,float16,float16,0,2.155061403910319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,64,0,1,fp8,fp8,0,4.25156815846761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,64,0,1,float16,fp8,0,4.660287857055664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,64,128,1,float16,fp8,0,2.169066588083903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,64,128,1,fp8,fp8,0,1.983349323272705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,64,0,1,float16,float16,0,4.628378550211589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,64,128,1,float16,float16,0,2.1906186739603677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,64,0,1,fp8,fp8,0,4.270613352457683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,64,0,1,float16,fp8,0,4.641503969828288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,64,128,1,fp8,fp8,0,2.027023951212565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,64,128,1,float16,fp8,0,2.205712000528971
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,64,0,1,float16,float16,0,4.6747786204020185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,64,128,1,float16,float16,0,1.2198987007141113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,64,0,1,float16,float16,0,2.4954506556193032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,64,128,1,float16,fp8,0,1.2427199681599934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,64,0,1,float16,fp8,0,4.731413205464681
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,64,128,1,fp8,fp8,0,1.1608746846516926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,64,0,1,fp8,fp8,0,4.315386772155762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,64,0,1,fp8,fp8,0,2.31932799021403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,64,128,1,float16,float16,0,1.0717493693033855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,64,0,1,float16,fp8,0,2.528773307800293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,64,128,1,fp8,fp8,0,0.9830186367034912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,64,128,1,float16,fp8,0,1.0809173583984375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,64,0,1,float16,float16,0,2.318544069925944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,64,0,1,float16,fp8,0,2.32912540435791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,64,128,1,float16,float16,0,1.0787306626637776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,64,0,1,fp8,fp8,0,2.1414453188578286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,64,128,1,float16,fp8,0,1.089402675628662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,64,128,1,fp8,fp8,0,0.9922026793162028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,64,0,1,float16,float16,0,2.33023993174235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,64,128,1,float16,float16,0,1.0948320229848225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,64,0,1,float16,fp8,0,2.338533401489258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,64,0,1,fp8,fp8,0,2.147808074951172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,64,128,1,float16,fp8,0,1.1053706804911296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,64,128,1,fp8,fp8,0,1.012768030166626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,64,0,1,float16,float16,0,2.3496267000834146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,64,128,1,float16,float16,0,0.6304800113042196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,64,128,1,float16,fp8,0,0.6453226804733276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,64,0,1,float16,float16,0,1.2798720200856526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,64,0,1,float16,fp8,0,2.358565330505371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,64,0,1,fp8,fp8,0,2.168464024861654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,64,128,1,fp8,fp8,0,0.6028906504313151
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,64,128,1,float16,float16,0,0.5577119986216227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,64,0,1,fp8,fp8,0,1.193189303080241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,64,0,1,float16,fp8,0,1.2935840288798015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,64,128,1,float16,fp8,0,0.5631200075149536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,64,0,1,float16,float16,0,1.1967679659525554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,64,128,1,fp8,fp8,0,0.5176053444544474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,64,128,1,float16,float16,0,0.5628586610158285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,64,0,1,float16,fp8,0,1.2018773555755615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,64,0,1,fp8,fp8,0,1.1058986981709797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,64,0,1,float16,float16,0,1.2003626823425293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,64,128,1,fp8,fp8,0,0.5210826794306437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,64,128,1,float16,fp8,0,0.5683573484420776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,64,0,1,float16,fp8,0,1.2049280007680256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,64,128,1,float16,float16,0,0.5693546533584595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,64,0,1,fp8,fp8,0,1.111477295557658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,64,0,1,float16,float16,0,1.2104907035827637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,64,128,1,fp8,fp8,0,0.5293386777242025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,64,128,1,float16,fp8,0,0.5772320032119751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,64,128,1,float16,float16,0,0.3370933135350545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,64,0,1,float16,float16,0,0.6758293310801188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,64,128,1,float16,fp8,0,0.3450933297475179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,64,0,1,float16,fp8,0,1.216549317042033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,64,0,1,fp8,fp8,0,1.1199573675791423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,64,128,1,fp8,fp8,0,0.32449066638946533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,64,128,1,float16,float16,0,0.2983786662419637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,64,0,1,float16,fp8,0,0.6845653057098389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,64,0,1,fp8,fp8,0,0.6317280133565267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,64,0,1,float16,float16,0,0.6300373474756876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,64,128,1,float16,fp8,0,0.30076799790064496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,64,128,1,fp8,fp8,0,0.2818719943364461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,64,128,1,float16,float16,0,0.3017759919166565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,64,0,1,float16,fp8,0,0.6324160099029541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,64,0,1,fp8,fp8,0,0.5894826650619507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,64,0,1,float16,float16,0,0.6363573471705118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,64,128,1,fp8,fp8,0,0.2858240008354187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,64,128,1,float16,fp8,0,0.30582932631174725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,64,128,1,float16,float16,0,0.3059626619021098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,64,0,1,float16,fp8,0,0.6378933191299438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,64,0,1,fp8,fp8,0,0.5913920005162557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,64,0,1,float16,float16,0,0.6418186823527018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,64,128,1,float16,fp8,0,0.30932267506917316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,64,128,1,fp8,fp8,0,0.2896160085995992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,64,128,1,float16,float16,0,0.19341866175333658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,64,0,1,float16,float16,0,0.374783992767334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,64,0,1,float16,fp8,0,0.6452320019404093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,64,0,1,fp8,fp8,0,0.5959573189417521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,64,128,1,float16,fp8,0,0.19860267639160156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,64,128,1,fp8,fp8,0,0.1891146699587504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,64,128,1,float16,float16,0,0.1710559924443563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,64,0,1,float16,fp8,0,0.3798346519470215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,64,0,1,fp8,fp8,0,0.35353068510691327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,64,0,1,float16,float16,0,0.34969600041707355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,64,128,1,float16,fp8,0,0.17303466796875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,64,128,1,fp8,fp8,0,0.1605226695537567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,64,0,1,float16,fp8,0,0.3518666823705037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,64,128,1,float16,float16,0,0.17300266027450562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,64,0,1,fp8,fp8,0,0.32796265681584674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,64,0,1,float16,float16,0,0.35044801235198975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,64,128,1,float16,fp8,0,0.17415465911229452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,64,128,1,fp8,fp8,0,0.16485333442687988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,64,128,1,float16,float16,0,0.17498133579889932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,64,0,1,float16,fp8,0,0.35254931449890137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,64,0,1,fp8,fp8,0,0.33035733302434284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,64,0,1,float16,float16,0,0.35393067200978595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,64,128,1,float16,fp8,0,0.1771413286526998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,64,128,1,fp8,fp8,0,0.1697280009587606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,64,0,1,float16,fp8,0,0.35708268483479816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,64,128,1,float16,float16,0,0.139765332142512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,64,0,1,fp8,fp8,0,0.33296000957489014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,64,0,1,float16,float16,0,0.24281599124272665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,64,128,1,float16,fp8,0,0.14030399918556213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,64,128,1,fp8,fp8,0,0.1325813333193461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,64,0,1,float16,fp8,0,0.2424266735712687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,64,0,1,fp8,fp8,0,0.22662933667500815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,64,128,1,float16,float16,0,0.13748799761136374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,64,0,1,float16,float16,0,0.24026666084925333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,64,128,1,float16,fp8,0,0.13801067074139914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,64,128,1,fp8,fp8,0,0.13029332955678305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,64,0,1,float16,fp8,0,0.24095465739568075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,64,0,1,fp8,fp8,0,0.22502932945887247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,64,128,1,float16,float16,0,0.13793599605560303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,64,0,1,float16,float16,0,0.24079465866088867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,64,128,1,float16,fp8,0,0.13808000087738037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,64,128,1,fp8,fp8,0,0.13061333696047464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,64,0,1,float16,fp8,0,0.24109333753585815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,64,0,1,fp8,fp8,0,0.22443199157714844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,64,128,1,float16,float16,0,0.13803199927012125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,64,128,1,float16,fp8,0,0.1383840044339498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,64,0,1,float16,float16,0,0.24181866645812988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,64,128,1,fp8,fp8,0,0.12988799810409546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,64,0,1,float16,fp8,0,0.24102934201558432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,64,0,1,fp8,fp8,0,0.2239733338356018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,64,128,1,float16,float16,0,1.583898703257243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,64,128,1,float16,fp8,0,1.5960052808125813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,64,0,1,float16,float16,0,2.900773366292318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,64,128,1,fp8,fp8,0,1.4565866788228352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,64,0,1,float16,fp8,0,2.9136425654093423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,64,128,1,float16,float16,0,1.5965654055277507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,64,0,1,fp8,fp8,0,2.6725759506225586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,64,128,1,float16,fp8,0,1.6094932556152344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,64,128,1,fp8,fp8,0,1.4715572992960613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,64,0,1,float16,float16,0,2.9141438802083335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,64,128,1,float16,float16,0,1.62335999806722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,64,0,1,float16,fp8,0,2.9293813705444336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,64,0,1,fp8,fp8,0,2.688997268676758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,64,128,1,fp8,fp8,0,1.5030399958292644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,64,128,1,float16,fp8,0,1.6378666559855144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,64,0,1,float16,float16,0,2.9451732635498047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,64,128,1,float16,float16,0,0.9187306563059489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,64,0,1,float16,fp8,0,2.958592096964518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,64,0,1,float16,float16,0,1.597424030303955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,64,128,1,float16,fp8,0,0.9344267050425211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,64,0,1,fp8,fp8,0,2.721194585164388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,64,128,1,fp8,fp8,0,0.8711573282877604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,64,128,1,float16,float16,0,0.807973305384318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,64,0,1,float16,fp8,0,1.614751974741618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,64,0,1,fp8,fp8,0,1.4925492604573567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,64,0,1,float16,float16,0,1.475002606709798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,64,128,1,float16,fp8,0,0.8143146832784017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,64,128,1,fp8,fp8,0,0.7419146696726481
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,64,0,1,float16,fp8,0,1.4843146006266277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,64,128,1,float16,float16,0,0.8123946984608968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,64,0,1,fp8,fp8,0,1.3595306078592937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,64,0,1,float16,float16,0,1.4813706080118816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,64,128,1,float16,fp8,0,0.8226293722788492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,64,128,1,fp8,fp8,0,0.7493120034535726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,64,128,1,float16,float16,0,0.8255946636199951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,64,0,1,float16,fp8,0,1.490554650624593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,64,0,1,fp8,fp8,0,1.3676586151123047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,64,0,1,float16,float16,0,1.497978687286377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,64,128,1,float16,fp8,0,0.8345173199971517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,64,128,1,fp8,fp8,0,0.7642772992451986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,64,128,1,float16,float16,0,0.47792001565297443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,64,0,1,float16,fp8,0,1.5057226816813152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,64,0,1,float16,float16,0,0.8278613090515137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,64,0,1,fp8,fp8,0,1.3816852569580078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,64,128,1,float16,fp8,0,0.4881173372268677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,64,128,1,fp8,fp8,0,0.45746131738026935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,64,128,1,float16,float16,0,0.42320001125335693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,64,0,1,float16,fp8,0,0.8378986517588297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,64,0,1,fp8,fp8,0,0.7738347053527832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,64,0,1,float16,float16,0,0.7655999660491943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,64,128,1,float16,fp8,0,0.42723198731740314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,64,128,1,fp8,fp8,0,0.3921866814295451
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,64,0,1,float16,fp8,0,0.7691840330759684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,64,128,1,float16,float16,0,0.4262080192565918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,64,0,1,fp8,fp8,0,0.7098080317179362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,64,0,1,float16,float16,0,0.7691199779510498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,64,128,1,float16,fp8,0,0.42974400520324707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,64,128,1,fp8,fp8,0,0.3961546818415324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,64,0,1,float16,fp8,0,0.77347199122111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,64,128,1,float16,float16,0,0.43218668301900226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,64,0,1,fp8,fp8,0,0.7134453455607096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,64,0,1,float16,float16,0,0.7777706782023112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,64,128,1,float16,fp8,0,0.4373706579208374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,64,128,1,fp8,fp8,0,0.4029386838277181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,64,128,1,float16,float16,0,0.25778132677078247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,64,0,1,float16,fp8,0,0.7820959885915121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,64,0,1,float16,float16,0,0.4429386854171753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,64,0,1,fp8,fp8,0,0.720789353052775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,64,128,1,float16,fp8,0,0.26501866181691486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,64,128,1,fp8,fp8,0,0.24875199794769287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,64,128,1,float16,float16,0,0.2264159917831421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,64,0,1,float16,fp8,0,0.4503093163172404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,64,0,1,fp8,fp8,0,0.41732800006866455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,64,0,1,float16,float16,0,0.4061280091603597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,64,128,1,float16,fp8,0,0.22871466477711996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,64,128,1,fp8,fp8,0,0.2157599925994873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,64,0,1,float16,fp8,0,0.4102506637573242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,64,0,1,fp8,fp8,0,0.3819520076115926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,64,128,1,float16,float16,0,0.22973867257436117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,64,0,1,float16,float16,0,0.4103573163350423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,64,128,1,float16,fp8,0,0.23036267360051474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,64,128,1,fp8,fp8,0,0.21795733769734701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,64,0,1,float16,fp8,0,0.41124268372853595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,64,128,1,float16,float16,0,0.23392534255981445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,64,0,1,fp8,fp8,0,0.3854453166325887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,64,0,1,float16,float16,0,0.4159146547317505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,64,128,1,float16,fp8,0,0.2364906668663025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,64,128,1,fp8,fp8,0,0.2223680019378662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,64,128,1,float16,float16,0,0.150629331668218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,64,0,1,float16,fp8,0,0.41921067237854004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,64,0,1,fp8,fp8,0,0.389792005221049
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,64,128,1,float16,fp8,0,0.15261333187421164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,64,0,1,float16,float16,0,0.2513599991798401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,64,128,1,fp8,fp8,0,0.14672000209490457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,64,0,1,float16,fp8,0,0.25547200441360474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,64,0,1,fp8,fp8,0,0.2387253244717916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,64,128,1,float16,float16,0,0.13276267051696777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,64,128,1,float16,fp8,0,0.132832000652949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,64,128,1,fp8,fp8,0,0.12181333700815837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,64,0,1,float16,float16,0,0.23316800594329834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,64,128,1,float16,float16,0,0.13215999801953635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,64,0,1,float16,fp8,0,0.23255999883015951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,64,0,1,fp8,fp8,0,0.21389333407084146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,64,128,1,float16,fp8,0,0.13395200173060098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,64,128,1,fp8,fp8,0,0.12353600064913432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,64,0,1,float16,float16,0,0.2329919934272766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,64,0,1,fp8,fp8,0,0.21582933266957602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,64,128,1,float16,float16,0,0.13403733571370444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,64,0,1,float16,fp8,0,0.2339573303858439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,64,128,1,float16,fp8,0,0.13606933752695718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,64,128,1,fp8,fp8,0,0.12866133451461792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,64,0,1,float16,float16,0,0.23429334163665771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,64,128,1,float16,float16,0,0.10921066999435425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,64,0,1,fp8,fp8,0,0.22023999691009521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,64,0,1,float16,fp8,0,0.2360373338063558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,64,128,1,float16,fp8,0,0.10922132929166158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,64,128,1,fp8,fp8,0,0.10394133130709331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,64,0,1,float16,float16,0,0.16717867056528726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,64,128,1,float16,float16,0,0.10753599802652995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,64,0,1,float16,fp8,0,0.16698666413625082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,64,0,1,fp8,fp8,0,0.15656532843907675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,64,128,1,float16,fp8,0,0.10745599865913391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,64,128,1,fp8,fp8,0,0.10129066308339436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,64,0,1,float16,float16,0,0.16685332854588827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,64,0,1,float16,fp8,0,0.1660053332646688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,64,128,1,float16,float16,0,0.10748799641927083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,64,0,1,fp8,fp8,0,0.1553386648495992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,64,128,1,float16,fp8,0,0.1086079974969228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,64,128,1,fp8,fp8,0,0.1014453371365865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,64,0,1,float16,float16,0,0.16657599806785583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,64,0,1,float16,fp8,0,0.1682986617088318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,64,0,1,fp8,fp8,0,0.1566986640294393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,64,128,1,float16,float16,0,0.10758399963378906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,64,128,1,float16,fp8,0,0.10733333230018616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,64,128,1,fp8,fp8,0,0.10316800077756245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,64,0,1,float16,float16,0,0.1669600009918213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,64,0,1,fp8,fp8,0,0.154831995566686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,64,0,1,float16,fp8,0,0.16735466321309408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,64,128,1,float16,float16,0,2.1246132850646973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,64,128,1,float16,fp8,0,2.1335573196411133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,64,0,1,float16,float16,0,3.1541385650634766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,64,128,1,fp8,fp8,0,1.9299999872843425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,64,0,1,float16,fp8,0,3.1658881505330405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,64,0,1,fp8,fp8,0,2.8799947102864585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,64,128,1,float16,float16,0,2.156869411468506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,64,0,1,float16,float16,0,3.1855198542277017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,64,128,1,fp8,fp8,0,1.949168046315511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,64,128,1,float16,fp8,0,2.158751964569092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,64,0,1,float16,fp8,0,3.19490655263265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,64,0,1,fp8,fp8,0,2.9020373026529946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,64,128,1,float16,float16,0,2.1852639516194663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,64,0,1,float16,float16,0,3.2181065877278647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,64,128,1,float16,fp8,0,2.197824001312256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,64,128,1,fp8,fp8,0,1.9973386128743489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,64,128,1,float16,float16,0,1.2082773049672444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,64,0,1,float16,fp8,0,3.23526922861735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,64,0,1,float16,float16,0,1.745349407196045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,64,0,1,fp8,fp8,0,2.948458671569824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,64,128,1,float16,fp8,0,1.226138671239217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,64,128,1,fp8,fp8,0,1.1450773080190022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,64,128,1,float16,float16,0,1.0552213191986084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,64,0,1,float16,fp8,0,1.762730598449707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,64,0,1,fp8,fp8,0,1.631322701772054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,64,0,1,float16,float16,0,1.5769492785135906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,64,128,1,float16,fp8,0,1.064517339070638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,64,128,1,fp8,fp8,0,0.9693493048350016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,64,0,1,float16,fp8,0,1.5866133371988933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,64,128,1,float16,float16,0,1.064079999923706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,64,0,1,fp8,fp8,0,1.4492799441019695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,64,0,1,float16,float16,0,1.589471975962321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,64,128,1,float16,fp8,0,1.0733386675516765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,64,128,1,fp8,fp8,0,0.9777973492940267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,64,0,1,float16,fp8,0,1.597274621327718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,64,128,1,float16,float16,0,1.0824693044026692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,64,0,1,fp8,fp8,0,1.4598080317179363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,64,0,1,float16,float16,0,1.610213279724121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,64,128,1,float16,fp8,0,1.0934453010559082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,64,128,1,fp8,fp8,0,0.9981066385904948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,64,128,1,float16,float16,0,0.6171679894129435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,64,0,1,float16,fp8,0,1.6189813613891602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,64,0,1,float16,float16,0,0.8913546403249105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,64,0,1,fp8,fp8,0,1.4805599848429363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,64,128,1,float16,fp8,0,0.6289600133895874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,64,128,1,fp8,fp8,0,0.586677352587382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,64,128,1,float16,float16,0,0.5433013439178467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,64,0,1,float16,fp8,0,0.9021546840667725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,64,0,1,fp8,fp8,0,0.8341226577758789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,64,0,1,float16,float16,0,0.8108426729838053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,64,128,1,fp8,fp8,0,0.5008000135421753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,64,128,1,float16,fp8,0,0.5485866864522299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,64,0,1,float16,fp8,0,0.8150880336761475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,64,128,1,float16,float16,0,0.548255999883016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,64,0,1,fp8,fp8,0,0.7469546794891357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,64,0,1,float16,float16,0,0.8150239785512289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,64,128,1,float16,fp8,0,0.5536213318506876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,64,128,1,fp8,fp8,0,0.5049493312835693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,64,128,1,float16,float16,0,0.5567200183868408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,64,0,1,fp8,fp8,0,0.7512426376342773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,64,0,1,float16,fp8,0,0.8200319608052572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,64,128,1,fp8,fp8,0,0.5151573419570923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,64,128,1,float16,fp8,0,0.5634400049845377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,64,0,1,float16,float16,0,0.8261386553446451
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,64,128,1,float16,float16,0,0.32340800762176514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,64,0,1,fp8,fp8,0,0.7608853181203207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,64,0,1,float16,fp8,0,0.8335146903991699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,64,0,1,float16,float16,0,0.46754133701324463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,64,128,1,float16,fp8,0,0.3312159975369771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,64,128,1,fp8,fp8,0,0.31227733691533405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,64,0,1,float16,fp8,0,0.47491200764973956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,64,128,1,float16,float16,0,0.2826133370399475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,64,0,1,fp8,fp8,0,0.44097065925598145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,64,128,1,float16,fp8,0,0.2860106627146403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,64,128,1,fp8,fp8,0,0.26734399795532227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,64,0,1,float16,float16,0,0.4233226776123047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,64,128,1,float16,float16,0,0.2876586715380351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,64,0,1,float16,fp8,0,0.42665600776672363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,64,0,1,fp8,fp8,0,0.39555199940999347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,64,128,1,float16,fp8,0,0.2901013294855754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,64,128,1,fp8,fp8,0,0.27053866783777875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,64,0,1,float16,float16,0,0.42711468537648517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,64,0,1,float16,fp8,0,0.43030401070912677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,64,128,1,float16,float16,0,0.2930399974187215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,64,0,1,fp8,fp8,0,0.39870933691660565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,64,0,1,float16,float16,0,0.4320480028788249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,64,128,1,fp8,fp8,0,0.27658132712046307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,64,128,1,float16,fp8,0,0.298250675201416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,64,0,1,float16,fp8,0,0.43806934356689453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,64,0,1,fp8,fp8,0,0.4028746684392293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,64,128,1,float16,float16,0,0.1793066660563151
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,64,0,1,float16,float16,0,0.2578666607538859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,64,128,1,float16,fp8,0,0.18336000045140585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,64,128,1,fp8,fp8,0,0.17416000366210938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,64,0,1,float16,fp8,0,0.2603573401769002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,64,128,1,float16,float16,0,0.15312000115712485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,64,0,1,fp8,fp8,0,0.24270933866500854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,64,0,1,float16,float16,0,0.2283466657002767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,64,128,1,float16,fp8,0,0.15576533476511636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,64,128,1,fp8,fp8,0,0.14737066626548767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,64,0,1,float16,fp8,0,0.23046932617823282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,64,128,1,float16,float16,0,0.1545973320802053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,64,0,1,fp8,fp8,0,0.2146186629931132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,64,0,1,float16,float16,0,0.23062400023142496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,64,128,1,float16,fp8,0,0.1583466629187266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,64,128,1,fp8,fp8,0,0.15038933356602988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,64,0,1,float16,fp8,0,0.23029865821202597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,64,128,1,float16,float16,0,0.15851199626922607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,64,0,1,fp8,fp8,0,0.2180639902750651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,64,0,1,float16,float16,0,0.23476266860961914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,64,128,1,float16,fp8,0,0.160480002562205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,64,128,1,fp8,fp8,0,0.1551199952761332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,64,0,1,float16,fp8,0,0.2367039918899536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,64,128,1,float16,float16,0,0.10341333349545796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,64,0,1,fp8,fp8,0,0.22395733992258707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,64,0,1,float16,float16,0,0.1474186678727468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,64,128,1,float16,fp8,0,0.10604799787203471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,64,128,1,fp8,fp8,0,0.10502400000890096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,64,0,1,float16,fp8,0,0.14903466900189719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,64,0,1,fp8,fp8,0,0.14268799622853598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,64,128,1,float16,float16,0,0.09458667039871216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,64,0,1,float16,float16,0,0.1381119986375173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,64,128,1,float16,fp8,0,0.09489599863688152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,64,128,1,fp8,fp8,0,0.08725866675376892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,64,0,1,float16,fp8,0,0.13775466879208884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,64,0,1,fp8,fp8,0,0.12779200077056885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,64,128,1,float16,float16,0,0.09455466270446777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,64,0,1,float16,float16,0,0.1365386644999186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,64,128,1,float16,fp8,0,0.095360000928243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,64,128,1,fp8,fp8,0,0.08893332878748576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,64,0,1,float16,fp8,0,0.13780267039934793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,64,0,1,fp8,fp8,0,0.12781866391499838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,64,128,1,float16,float16,0,0.09530666470527649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,64,0,1,float16,float16,0,0.13728533188501993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,64,128,1,float16,fp8,0,0.09713600079218547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,64,128,1,fp8,fp8,0,0.09089066584904988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,64,0,1,float16,fp8,0,0.13802133003870645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,64,128,1,float16,float16,0,0.0786293347676595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,64,0,1,fp8,fp8,0,0.1281546652317047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,64,0,1,float16,float16,0,0.10377599795659383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,64,128,1,float16,fp8,0,0.0784853349129359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,64,128,1,fp8,fp8,0,0.07448000212510426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,64,0,1,float16,fp8,0,0.10316800077756245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,64,0,1,fp8,fp8,0,0.09894933303197224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,64,128,1,float16,float16,0,0.0784800002972285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,64,0,1,float16,float16,0,0.10335999727249146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,64,128,1,float16,fp8,0,0.07660266757011414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,64,128,1,fp8,fp8,0,0.07457066575686137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,64,0,1,float16,fp8,0,0.10408000151316325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,64,128,1,float16,float16,0,0.07851733267307281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,64,0,1,fp8,fp8,0,0.09709333380063374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,64,0,1,float16,float16,0,0.10339200496673584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,64,128,1,float16,fp8,0,0.07869333525498708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,64,128,1,fp8,fp8,0,0.07373866438865662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,64,0,1,float16,fp8,0,0.10313600301742554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,64,0,1,fp8,fp8,0,0.09698133667310078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,64,128,1,float16,float16,0,0.0775733341773351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,64,128,1,float16,fp8,0,0.0779306689898173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,64,0,1,float16,float16,0,0.1039573351542155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,64,128,1,fp8,fp8,0,0.07337599992752075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,64,0,1,float16,fp8,0,0.10337600111961365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,64,0,1,fp8,fp8,0,0.09914132952690125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,64,128,1,float16,float16,0,1.5771679878234863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,64,128,1,float16,fp8,0,1.5875892639160156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,64,0,1,float16,float16,0,2.0856053034464517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,64,128,1,fp8,fp8,0,1.4369707107543945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,64,0,1,float16,fp8,0,2.0973547299702964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,64,0,1,fp8,fp8,0,1.9064000447591145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,64,128,1,float16,float16,0,1.5929652849833171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,64,128,1,float16,fp8,0,1.6054666837056477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,64,0,1,float16,float16,0,2.106917381286621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,64,128,1,fp8,fp8,0,1.4544213612874348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,64,128,1,float16,float16,0,1.6180373827616374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,64,0,1,fp8,fp8,0,1.925813357035319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,64,0,1,float16,fp8,0,2.1149867375691733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,64,0,1,float16,float16,0,2.132885297139486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,64,128,1,fp8,fp8,0,1.481760025024414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,64,128,1,float16,fp8,0,1.6289706230163574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,64,0,1,float16,fp8,0,2.1412906646728516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,64,128,1,float16,float16,0,0.9084906578063965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,64,0,1,fp8,fp8,0,1.952005386352539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,64,0,1,float16,float16,0,1.1788746515909831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,64,128,1,float16,fp8,0,0.9229280153910319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,64,128,1,fp8,fp8,0,0.8611520131429037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,64,0,1,float16,fp8,0,1.1935679912567139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,64,0,1,fp8,fp8,0,1.103386640548706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,64,128,1,float16,float16,0,0.7974613507588705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,64,0,1,float16,float16,0,1.0541813373565674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,64,128,1,float16,fp8,0,0.8027306397755941
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,64,128,1,fp8,fp8,0,0.7295520305633545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,64,0,1,float16,fp8,0,1.0612800121307373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,64,0,1,fp8,fp8,0,0.9672533671061198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,64,128,1,float16,float16,0,0.8033226331075033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,64,0,1,float16,float16,0,1.0631306966145833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,64,128,1,float16,fp8,0,0.8099520206451416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,64,128,1,fp8,fp8,0,0.7372693220774332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,64,0,1,float16,fp8,0,1.069381316502889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,64,0,1,fp8,fp8,0,0.9753173192342123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,64,128,1,float16,float16,0,0.8136640389760336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,64,0,1,float16,float16,0,1.0733280181884766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,64,128,1,float16,fp8,0,0.8210079669952393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,64,128,1,fp8,fp8,0,0.7497173150380453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,64,128,1,float16,float16,0,0.46798400084177655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,64,0,1,float16,fp8,0,1.082192023595174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,64,0,1,float16,float16,0,0.6068533261617025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,64,0,1,fp8,fp8,0,0.9880586465199789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,64,128,1,float16,fp8,0,0.4779520034790039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,64,128,1,fp8,fp8,0,0.44465065002441406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,64,0,1,float16,fp8,0,0.6174346605936686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,64,0,1,fp8,fp8,0,0.5683093468348185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,64,128,1,float16,float16,0,0.4102773269017537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,64,0,1,float16,float16,0,0.5418346722920736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,64,128,1,float16,fp8,0,0.4150613149007161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,64,128,1,fp8,fp8,0,0.3817760149637858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,64,0,1,float16,fp8,0,0.546725352605184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,64,128,1,float16,float16,0,0.41468266646067303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,64,0,1,fp8,fp8,0,0.503050684928894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,64,0,1,float16,float16,0,0.5502453247706095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,64,128,1,float16,fp8,0,0.41947734355926514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,64,128,1,fp8,fp8,0,0.38390398025512695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,64,0,1,float16,fp8,0,0.5524533192316691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,64,0,1,fp8,fp8,0,0.507861336072286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,64,128,1,float16,float16,0,0.42022931575775146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,64,0,1,float16,float16,0,0.5570613145828247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,64,128,1,float16,fp8,0,0.4257440169652303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,64,128,1,fp8,fp8,0,0.3906346559524536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,64,0,1,float16,fp8,0,0.5586666663487753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,64,128,1,float16,float16,0,0.24841066201527914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,64,0,1,fp8,fp8,0,0.5142613252003988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,64,0,1,float16,float16,0,0.3218239943186442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,64,128,1,fp8,fp8,0,0.23837866385777792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,64,128,1,float16,fp8,0,0.2545599937438965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,64,0,1,float16,fp8,0,0.3274986743927002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,64,0,1,fp8,fp8,0,0.30322132507960003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,64,128,1,float16,float16,0,0.21337600549062094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,64,0,1,float16,float16,0,0.28434133529663086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,64,128,1,fp8,fp8,0,0.20367467403411865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,64,128,1,float16,fp8,0,0.21726399660110474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,64,0,1,float16,fp8,0,0.2852800091107686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,64,0,1,fp8,fp8,0,0.26787734031677246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,64,128,1,float16,float16,0,0.21619200706481934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,64,0,1,float16,float16,0,0.2868640025456746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,64,128,1,float16,fp8,0,0.21935999393463135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,64,128,1,fp8,fp8,0,0.20637333393096924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,64,0,1,float16,fp8,0,0.29103465874989826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,64,0,1,fp8,fp8,0,0.2715146740277608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,64,128,1,float16,float16,0,0.22206934293111166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,64,0,1,float16,float16,0,0.29176000754038495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,64,128,1,float16,fp8,0,0.22534932692845663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,64,128,1,fp8,fp8,0,0.2104746699333191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,64,0,1,float16,fp8,0,0.29421333471934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,64,0,1,fp8,fp8,0,0.27541865905125934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,64,128,1,float16,float16,0,0.13818132877349854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,64,0,1,float16,float16,0,0.17717333634694418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,64,128,1,float16,fp8,0,0.14205867052078247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,64,128,1,fp8,fp8,0,0.13390933473904928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,64,0,1,float16,fp8,0,0.18112534284591675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,64,0,1,fp8,fp8,0,0.1711626648902893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,64,128,1,float16,float16,0,0.11974933743476868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,64,0,1,float16,float16,0,0.15727999806404114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,64,128,1,float16,fp8,0,0.11990400155385335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,64,128,1,fp8,fp8,0,0.11125866572062175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,64,0,1,float16,fp8,0,0.1588266690572103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,64,128,1,float16,float16,0,0.11973333358764648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,64,0,1,fp8,fp8,0,0.14441066980361938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,64,0,1,float16,float16,0,0.15796266992886862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,64,128,1,float16,fp8,0,0.12174399693806966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,64,128,1,fp8,fp8,0,0.11201600233713786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,64,0,1,float16,fp8,0,0.159061332543691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,64,128,1,float16,float16,0,0.12194666266441345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,64,0,1,fp8,fp8,0,0.14641599853833517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,64,0,1,float16,float16,0,0.1606933375199636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,64,128,1,float16,fp8,0,0.12379733721415202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,64,128,1,fp8,fp8,0,0.11562666296958923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,64,0,1,float16,fp8,0,0.1604266663392385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,64,128,1,float16,float16,0,0.08065066734949748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,64,0,1,fp8,fp8,0,0.15271466970443726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,64,0,1,float16,float16,0,0.10363733768463135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,64,128,1,float16,fp8,0,0.08303999900817871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,64,128,1,fp8,fp8,0,0.08043733239173889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,64,0,1,fp8,fp8,0,0.1030399998029073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,64,0,1,float16,fp8,0,0.10538666447003682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,64,128,1,float16,float16,0,0.07456533114115398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,64,128,1,float16,fp8,0,0.07482133309046428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,64,0,1,float16,float16,0,0.09884267052014668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,64,128,1,fp8,fp8,0,0.07073600093523662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,64,0,1,float16,fp8,0,0.09915733337402344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,64,0,1,fp8,fp8,0,0.09118933478991191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,64,128,1,float16,float16,0,0.0763733337322871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,64,128,1,float16,fp8,0,0.07656533519426982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,64,0,1,float16,float16,0,0.09898666540781657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,64,128,1,fp8,fp8,0,0.07065066695213318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,64,128,1,float16,float16,0,0.07492800056934357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,64,0,1,float16,fp8,0,0.09874666730562846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,64,0,1,fp8,fp8,0,0.0909546713034312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,64,128,1,fp8,fp8,0,0.07111999889214833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,64,128,1,float16,fp8,0,0.07681599756081899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,64,0,1,float16,float16,0,0.09883733590443929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,64,128,1,float16,float16,0,0.06229866544405619
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,64,0,1,float16,fp8,0,0.10076266527175903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,64,0,1,fp8,fp8,0,0.09287466605504353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,64,0,1,float16,float16,0,0.0784853349129359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,64,128,1,float16,fp8,0,0.0621013343334198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,64,128,1,fp8,fp8,0,0.05869866907596588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,64,0,1,float16,fp8,0,0.07879999776681264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,64,0,1,fp8,fp8,0,0.07268266876538594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,64,128,1,float16,float16,0,0.0624533345301946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,64,128,1,float16,fp8,0,0.0603413333495458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,64,128,1,fp8,fp8,0,0.0583840012550354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,64,0,1,float16,float16,0,0.07870933413505554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,64,0,1,float16,fp8,0,0.07858666777610779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,64,128,1,float16,float16,0,0.060138667623202004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,64,0,1,fp8,fp8,0,0.07442666590213776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,64,128,1,float16,fp8,0,0.060405333836873375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,64,0,1,float16,float16,0,0.07845866680145264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,64,128,1,fp8,fp8,0,0.0582826683918635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,64,0,1,float16,fp8,0,0.07798933486143748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,64,0,1,fp8,fp8,0,0.07442666590213776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,64,128,1,float16,float16,0,0.060234665870666504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,64,128,1,float16,fp8,0,0.06197333335876465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,64,128,1,fp8,fp8,0,0.058133333921432495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,64,0,1,float16,float16,0,0.07773333291212718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,64,0,1,float16,fp8,0,0.07885866860548656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,64,0,1,fp8,fp8,0,0.07255466779073079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,64,128,1,float16,float16,0,1.8572160402933757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,64,128,1,float16,fp8,0,1.857477347056071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,64,0,1,float16,float16,0,2.189472039540609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,64,128,1,fp8,fp8,0,1.7688105901082356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,64,0,1,float16,fp8,0,2.184901396433512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,64,0,1,fp8,fp8,0,2.0534666379292807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,64,128,1,float16,float16,0,1.8650186856587727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,64,0,1,float16,float16,0,2.2000373204549155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,64,128,1,float16,fp8,0,1.8600373268127441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,64,128,1,fp8,fp8,0,1.7753760019938152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,64,0,1,float16,fp8,0,2.1915413538614907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,64,0,1,fp8,fp8,0,2.0942293802897134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,64,128,1,float16,float16,0,1.9315733909606934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,64,0,1,float16,float16,0,2.276458740234375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,64,128,1,float16,fp8,0,1.912943998972575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,64,128,1,fp8,fp8,0,1.8608160018920898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,64,128,1,float16,float16,0,1.025487979253133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,64,0,1,float16,fp8,0,2.2244745890299478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,64,0,1,float16,float16,0,1.205135981241862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,64,0,1,fp8,fp8,0,2.1640480359395347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,64,128,1,float16,fp8,0,1.0045386950174968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,64,128,1,fp8,fp8,0,1.0063680013020833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,64,0,1,float16,fp8,0,1.1838826338450115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,64,0,1,fp8,fp8,0,1.1581066449483235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,64,128,1,float16,float16,0,0.9401760101318359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,64,0,1,float16,float16,0,1.1065013408660889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,64,128,1,float16,fp8,0,0.9381972948710123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,64,128,1,fp8,fp8,0,0.8870133558909098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,64,0,1,float16,fp8,0,1.1050293445587158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,64,0,1,fp8,fp8,0,1.0381759802500408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,64,128,1,float16,float16,0,0.9443946679433187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,64,0,1,float16,float16,0,1.1115413506825764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,64,128,1,fp8,fp8,0,0.8948533535003662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,64,128,1,float16,fp8,0,0.942741314570109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,64,0,1,float16,fp8,0,1.1119999885559082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,64,0,1,fp8,fp8,0,1.0466506481170654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,64,128,1,float16,float16,0,0.9518506526947021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,64,0,1,float16,float16,0,1.1203946272532146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,64,128,1,fp8,fp8,0,0.9142879645029703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,64,128,1,float16,fp8,0,0.9462080001831055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,64,128,1,float16,float16,0,0.5259093443552653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,64,0,1,float16,fp8,0,1.1166720390319824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,64,0,1,fp8,fp8,0,1.0603360335032146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,64,0,1,float16,float16,0,0.6188693443934122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,64,128,1,float16,fp8,0,0.5158666769663492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,64,128,1,fp8,fp8,0,0.5085813204447428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,64,0,1,float16,fp8,0,0.6075573364893595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,64,0,1,fp8,fp8,0,0.5872053305308024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,64,128,1,float16,float16,0,0.4800479809443156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,64,128,1,float16,fp8,0,0.47992531458536786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,64,128,1,fp8,fp8,0,0.4522240161895752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,64,0,1,float16,float16,0,0.5652586619059244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,64,0,1,float16,fp8,0,0.565445343653361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,64,128,1,float16,float16,0,0.48264535268147785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,64,0,1,fp8,fp8,0,0.5307466586430868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,64,0,1,float16,float16,0,0.5692853530248007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,64,128,1,float16,fp8,0,0.483514666557312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,64,128,1,fp8,fp8,0,0.4564106861750285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,64,0,1,float16,fp8,0,0.5690666834513346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,64,0,1,fp8,fp8,0,0.5352960030237833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,64,128,1,float16,float16,0,0.48692798614501953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,64,0,1,float16,float16,0,0.5728266636530558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,64,128,1,float16,fp8,0,0.4846880038579305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,64,128,1,fp8,fp8,0,0.46093865235646564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,64,128,1,float16,float16,0,0.2730826735496521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,64,0,1,float16,fp8,0,0.571733315785726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,64,0,1,fp8,fp8,0,0.5406186580657959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,64,0,1,float16,float16,0,0.32019199927647907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,64,128,1,float16,fp8,0,0.26955199241638184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,64,128,1,fp8,fp8,0,0.2642880082130432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,64,0,1,float16,fp8,0,0.3167733351389567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,64,0,1,fp8,fp8,0,0.3070826729138692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,64,128,1,float16,float16,0,0.24895467360814413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,64,0,1,float16,float16,0,0.2937333385149638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,64,128,1,float16,fp8,0,0.2501973311106364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,64,128,1,fp8,fp8,0,0.23748266696929932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,64,0,1,float16,fp8,0,0.29360000292460126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,64,0,1,fp8,fp8,0,0.2778293291727702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,64,128,1,float16,float16,0,0.25110934178034466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,64,0,1,float16,float16,0,0.29647467533747357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,64,128,1,float16,fp8,0,0.2510026693344116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,64,128,1,fp8,fp8,0,0.24266666173934937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,64,0,1,float16,fp8,0,0.2964906692504883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,64,128,1,float16,float16,0,0.2526826659838359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,64,0,1,fp8,fp8,0,0.2837226589520772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,64,0,1,float16,float16,0,0.3006346623102824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,64,128,1,float16,fp8,0,0.2550666729609172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,64,128,1,fp8,fp8,0,0.24278932809829712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,64,0,1,float16,fp8,0,0.2980159918467204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,64,0,1,fp8,fp8,0,0.28409600257873535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,64,128,1,float16,float16,0,0.14963733156522116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,64,0,1,float16,float16,0,0.17573332786560059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,64,128,1,float16,fp8,0,0.14684266845385233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,64,128,1,fp8,fp8,0,0.1467519998550415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,64,0,1,float16,fp8,0,0.17215466499328613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,64,0,1,fp8,fp8,0,0.16980266571044922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,64,128,1,float16,float16,0,0.13342400391896567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,64,0,1,float16,float16,0,0.15858667095502219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,64,128,1,float16,fp8,0,0.13191466530164084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,64,128,1,fp8,fp8,0,0.1279093325138092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,64,0,1,float16,fp8,0,0.15751999616622925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,64,0,1,fp8,fp8,0,0.15027200182278952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,64,128,1,float16,float16,0,0.13378666838010153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,64,0,1,float16,float16,0,0.15819733341534933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,64,128,1,fp8,fp8,0,0.1302880048751831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,64,128,1,float16,fp8,0,0.13450133800506592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,64,0,1,float16,fp8,0,0.1578933298587799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,64,0,1,fp8,fp8,0,0.15095999836921692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,64,128,1,float16,float16,0,0.13640532890955606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,64,0,1,float16,float16,0,0.16034133235613504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,64,128,1,float16,fp8,0,0.13617600003878275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,64,128,1,fp8,fp8,0,0.13153599699338278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,64,128,1,float16,float16,0,0.0827466646830241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,64,0,1,float16,fp8,0,0.16146666804949442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,64,0,1,fp8,fp8,0,0.1532639960447947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,64,0,1,float16,float16,0,0.09587732950846355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,64,128,1,float16,fp8,0,0.0814933329820633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,64,128,1,fp8,fp8,0,0.08343467116355896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,64,0,1,float16,fp8,0,0.09460799892743428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,64,0,1,fp8,fp8,0,0.0974026620388031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,64,128,1,float16,float16,0,0.07637866834799449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,64,0,1,float16,float16,0,0.08947733044624329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,64,128,1,float16,fp8,0,0.0763733337322871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,64,128,1,fp8,fp8,0,0.07161066432793935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,64,0,1,float16,fp8,0,0.08988266189893086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,64,0,1,fp8,fp8,0,0.08398933211962382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,64,128,1,float16,float16,0,0.07643199960390727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,64,0,1,float16,float16,0,0.08945600191752116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,64,128,1,float16,fp8,0,0.07523733377456665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,64,128,1,fp8,fp8,0,0.07053333520889282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,64,0,1,float16,fp8,0,0.08962133526802063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,64,0,1,fp8,fp8,0,0.08319999774297078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,64,128,1,float16,float16,0,0.07653866708278656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,64,0,1,float16,float16,0,0.09114666779836018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,64,128,1,float16,fp8,0,0.07622399926185608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,64,128,1,fp8,fp8,0,0.070783997575442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,64,0,1,float16,fp8,0,0.09126933415730794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,64,128,1,float16,float16,0,0.04804266492525736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,64,0,1,fp8,fp8,0,0.08480000495910645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,64,0,1,float16,float16,0,0.056330665946006775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,64,128,1,float16,fp8,0,0.04804799954096476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,64,128,1,fp8,fp8,0,0.04844800134499868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,64,0,1,float16,fp8,0,0.05656533439954122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,64,0,1,fp8,fp8,0,0.053818667928377785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,64,128,1,float16,float16,0,0.04764799773693085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,64,0,1,float16,float16,0,0.054010664423306785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,64,128,1,float16,fp8,0,0.04804266492525736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,64,128,1,fp8,fp8,0,0.04435733457406362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,64,0,1,float16,fp8,0,0.054048001766204834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,64,0,1,fp8,fp8,0,0.05217599868774414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,64,128,1,float16,float16,0,0.047968000173568726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,64,0,1,float16,float16,0,0.05400000015894572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,64,128,1,float16,fp8,0,0.04799999793370565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,64,128,1,fp8,fp8,0,0.045663997530937195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,64,0,1,float16,fp8,0,0.05442133545875549
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,64,0,1,fp8,fp8,0,0.05201066533724467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,64,128,1,float16,float16,0,0.04799999793370565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,64,0,1,float16,float16,0,0.05485333502292633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,64,128,1,float16,fp8,0,0.04789866507053375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,64,128,1,fp8,fp8,0,0.04587733248869578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,64,0,1,float16,fp8,0,0.055013333757718406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,64,0,1,fp8,fp8,0,0.05211733281612396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,64,128,1,float16,float16,0,0.03363200028737386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,64,0,1,float16,float16,0,0.041877334316571556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,64,128,1,float16,fp8,0,0.035445332527160645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,64,128,1,fp8,fp8,0,0.033333333830038704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,64,0,1,float16,fp8,0,0.04144533226887385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,64,0,1,fp8,fp8,0,0.039461334546407066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,64,128,1,float16,float16,0,0.03397866586844126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,64,0,1,float16,float16,0,0.03946666667858759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,64,128,1,float16,fp8,0,0.03339199970165888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,64,128,1,fp8,fp8,0,0.031397332747777305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,64,0,1,float16,fp8,0,0.03958933303753535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,64,0,1,fp8,fp8,0,0.03729599962631861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,64,128,1,float16,float16,0,0.035242666800816856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,64,0,1,float16,float16,0,0.03991466760635376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,64,128,1,float16,fp8,0,0.03338133295377096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,64,128,1,fp8,fp8,0,0.03357866654793421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,64,0,1,float16,fp8,0,0.0397119993964831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,64,0,1,fp8,fp8,0,0.03743999948104223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,64,128,1,float16,float16,0,0.03550933301448822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,64,0,1,float16,float16,0,0.03972266614437103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,64,128,1,float16,fp8,0,0.03510933369398117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,64,128,1,fp8,fp8,0,0.03368533402681351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,64,0,1,float16,fp8,0,0.03956799954175949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,64,0,1,fp8,fp8,0,0.03775999943415324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,64,128,1,float16,float16,0,1.803269386291504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,64,0,1,float16,float16,0,1.825834592183431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,64,128,1,float16,fp8,0,1.800997257232666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,64,128,1,fp8,fp8,0,1.7274133364359539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,64,0,1,float16,fp8,0,1.823344071706136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,64,0,1,fp8,fp8,0,1.7423413594563801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,64,128,1,float16,float16,0,1.8125972747802734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,64,0,1,float16,float16,0,1.836890697479248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,64,128,1,fp8,fp8,0,1.7555360794067383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,64,128,1,float16,fp8,0,1.8053760528564453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,64,0,1,float16,fp8,0,1.8309973080952961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,64,0,1,fp8,fp8,0,1.7627147038777669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,64,128,1,float16,float16,0,1.8928426106770833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,64,0,1,float16,float16,0,1.919040044148763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,64,128,1,float16,fp8,0,1.8783253033955891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,64,128,1,fp8,fp8,0,1.816186745961507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,64,128,1,float16,float16,0,0.9984959761301676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,64,0,1,float16,fp8,0,1.8930080731709797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,64,0,1,fp8,fp8,0,1.8295787175496419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,64,0,1,float16,float16,0,1.0174986521402996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,64,128,1,float16,fp8,0,0.97652800877889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,64,128,1,fp8,fp8,0,0.9778186480204264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,64,0,1,float16,fp8,0,0.9983627001444498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,64,0,1,fp8,fp8,0,1.0003039836883545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,64,128,1,float16,float16,0,0.909781297047933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,64,0,1,float16,float16,0,0.9220960140228271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,64,128,1,float16,fp8,0,0.9086559613545736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,64,128,1,fp8,fp8,0,0.8591307004292806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,64,0,1,float16,fp8,0,0.9224159717559814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,64,0,1,fp8,fp8,0,0.8670186996459961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,64,128,1,float16,float16,0,0.9155733585357666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,64,0,1,float16,float16,0,0.9285066922505697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,64,128,1,fp8,fp8,0,0.8679626782735189
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,64,128,1,float16,fp8,0,0.9142080148061117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,64,0,1,float16,fp8,0,0.9253173669179281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,64,0,1,fp8,fp8,0,0.8781226476033529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,64,128,1,float16,float16,0,0.9223946730295817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,64,0,1,float16,float16,0,0.9362346331278483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,64,128,1,float16,fp8,0,0.9197333653767904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,64,128,1,float16,float16,0,0.5132266680399576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,64,128,1,fp8,fp8,0,0.895418643951416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,64,0,1,float16,fp8,0,0.9321866830190023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,64,0,1,fp8,fp8,0,0.8941226800282797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,64,0,1,float16,float16,0,0.5237493515014648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,64,128,1,float16,fp8,0,0.50163201491038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,64,128,1,fp8,fp8,0,0.49455467859903973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,64,0,1,float16,fp8,0,0.5123680035273234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,64,128,1,float16,float16,0,0.46482666333516437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,64,0,1,fp8,fp8,0,0.5027679999669393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,64,0,1,float16,float16,0,0.4721440076828003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,64,128,1,fp8,fp8,0,0.4400159915288289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,64,128,1,float16,fp8,0,0.4657546679178874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,64,0,1,float16,fp8,0,0.47277867794036865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,64,0,1,fp8,fp8,0,0.4453013340632121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,64,128,1,float16,float16,0,0.46749866008758545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,64,0,1,float16,float16,0,0.47590935230255127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,64,128,1,float16,fp8,0,0.4694186846415202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,64,128,1,fp8,fp8,0,0.4437919855117798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,64,0,1,float16,fp8,0,0.4756693442662557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,64,0,1,fp8,fp8,0,0.44954665501912433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,64,128,1,float16,float16,0,0.47303466002146405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,64,0,1,float16,float16,0,0.4795893430709839
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,64,128,1,float16,fp8,0,0.4715733528137207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,64,128,1,fp8,fp8,0,0.448304017384847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,64,128,1,float16,float16,0,0.263973335425059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,64,0,1,fp8,fp8,0,0.4538559913635254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,64,0,1,float16,fp8,0,0.47992531458536786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,64,0,1,float16,float16,0,0.2705013354619344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,64,128,1,float16,fp8,0,0.2606933315594991
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,64,128,1,fp8,fp8,0,0.2590186595916748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,64,0,1,float16,fp8,0,0.26543466250101727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,64,0,1,fp8,fp8,0,0.26125333706537884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,64,128,1,float16,float16,0,0.24231467644373575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,64,0,1,float16,float16,0,0.24645866950352988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,64,128,1,fp8,fp8,0,0.2287893295288086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,64,128,1,float16,fp8,0,0.24181334177652994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,64,0,1,float16,fp8,0,0.24652800957361856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,64,0,1,fp8,fp8,0,0.2332586646080017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,64,128,1,float16,float16,0,0.24305599927902222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,64,0,1,float16,float16,0,0.24756266673405966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,64,128,1,float16,fp8,0,0.24552534023920694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,64,128,1,fp8,fp8,0,0.23482133944829306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,64,0,1,float16,fp8,0,0.24768533309300741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,64,0,1,fp8,fp8,0,0.23741867144902548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,64,128,1,float16,float16,0,0.24646933873494467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,64,0,1,float16,float16,0,0.24978667497634888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,64,128,1,float16,fp8,0,0.246778666973114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,64,128,1,fp8,fp8,0,0.23641600211461386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,64,128,1,float16,float16,0,0.14492799838383993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,64,0,1,fp8,fp8,0,0.23720000187555948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,64,0,1,float16,fp8,0,0.24980266888936362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,64,0,1,float16,float16,0,0.14892799655596414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,64,128,1,float16,fp8,0,0.1439626713593801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,64,128,1,fp8,fp8,0,0.14404799540837607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,64,0,1,fp8,fp8,0,0.1460693379243215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,64,0,1,float16,fp8,0,0.14571733276049295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,64,128,1,float16,float16,0,0.129530668258667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,64,0,1,float16,float16,0,0.13172800342241922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,64,128,1,float16,fp8,0,0.12972799936930338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,64,128,1,fp8,fp8,0,0.12474667032559712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,64,0,1,float16,fp8,0,0.1311946709950765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,64,0,1,fp8,fp8,0,0.12546666463216147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,64,128,1,float16,float16,0,0.1301866670449575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,64,0,1,float16,float16,0,0.13319999972979227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,64,128,1,float16,fp8,0,0.13011733690897623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,64,128,1,fp8,fp8,0,0.12612799803415933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,64,0,1,float16,fp8,0,0.13275200128555298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,64,0,1,fp8,fp8,0,0.12787200013796488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,64,128,1,float16,float16,0,0.13085866967837015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,64,0,1,float16,float16,0,0.13314666350682577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,64,128,1,float16,fp8,0,0.1322773297627767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,64,128,1,fp8,fp8,0,0.12763733665148416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,64,0,1,float16,fp8,0,0.13411200046539307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,64,0,1,fp8,fp8,0,0.12892267107963562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,64,128,1,float16,float16,0,0.08015466729799907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,64,0,1,float16,float16,0,0.07976533472537994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,64,128,1,float16,fp8,0,0.07904533545176189
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,64,128,1,fp8,fp8,0,0.08108266691366832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,64,0,1,float16,fp8,0,0.07876800000667572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,64,128,1,float16,float16,0,0.07396799822648366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,64,0,1,fp8,fp8,0,0.08132799963156383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,64,0,1,float16,float16,0,0.07267733414967854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,64,128,1,float16,fp8,0,0.07262933254241943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,64,128,1,fp8,fp8,0,0.06834133466084798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,64,0,1,float16,fp8,0,0.07352533439795177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,64,0,1,fp8,fp8,0,0.07011733452479045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,64,128,1,float16,float16,0,0.07275733351707458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,64,0,1,float16,float16,0,0.0740479975938797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,64,128,1,float16,fp8,0,0.07459199925263722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,64,128,1,fp8,fp8,0,0.06958933174610138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,64,0,1,float16,fp8,0,0.07413333157698314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,64,0,1,fp8,fp8,0,0.07054933408896129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,64,128,1,float16,float16,0,0.07468266785144806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,64,0,1,float16,float16,0,0.07434133191903432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,64,128,1,float16,fp8,0,0.07464000085989635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,64,128,1,fp8,fp8,0,0.07021333277225494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,64,0,1,float16,fp8,0,0.07482133309046428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,64,0,1,fp8,fp8,0,0.07067200044790904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,64,128,1,float16,float16,0,0.04784533381462097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,64,0,1,float16,float16,0,0.047914668917655945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,64,128,1,float16,fp8,0,0.04807466765244802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,64,128,1,fp8,fp8,0,0.04586133360862732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,64,0,1,float16,fp8,0,0.0462719996770223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,64,0,1,fp8,fp8,0,0.04604800045490265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,64,128,1,float16,float16,0,0.04568533102671305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,64,0,1,float16,float16,0,0.04582933088143667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,64,128,1,float16,fp8,0,0.04596266647179922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,64,0,1,float16,fp8,0,0.0458133320013682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,64,128,1,fp8,fp8,0,0.04376000165939331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,64,0,1,fp8,fp8,0,0.043925335009892784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,64,128,1,float16,float16,0,0.04693333307902018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,64,0,1,float16,float16,0,0.047279998660087585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,64,128,1,float16,fp8,0,0.045706664522488914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,64,128,1,fp8,fp8,0,0.043968002001444496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,64,0,1,float16,fp8,0,0.046575998266537987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,64,0,1,fp8,fp8,0,0.043552001317342125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,64,128,1,float16,float16,0,0.04726399978001913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,64,0,1,float16,float16,0,0.045882667104403176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,64,128,1,float16,fp8,0,0.045791998505592346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,64,128,1,fp8,fp8,0,0.044079999128977455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,64,0,1,float16,fp8,0,0.04597333570321401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,64,0,1,fp8,fp8,0,0.0435146689414978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,64,128,1,float16,float16,0,0.03383466601371765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,64,0,1,float16,float16,0,0.03352533280849457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,64,128,1,float16,fp8,0,0.035504000882307686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,64,128,1,fp8,fp8,0,0.033530667424201965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,64,0,1,float16,fp8,0,0.03442666679620743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,64,0,1,fp8,fp8,0,0.033471999069054924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,64,128,1,float16,float16,0,0.03365866591533025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,64,0,1,float16,float16,0,0.03462400039037069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,64,128,1,float16,fp8,0,0.033813332517941795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,64,128,1,fp8,fp8,0,0.033413333197434746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,64,0,1,float16,fp8,0,0.03495999922355016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,64,0,1,fp8,fp8,0,0.03306666761636734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,64,128,1,float16,float16,0,0.035018667578697205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,64,0,1,float16,float16,0,0.03499733408292135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,64,128,1,float16,fp8,0,0.033333333830038704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,64,128,1,fp8,fp8,0,0.03338133295377096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,64,0,1,float16,fp8,0,0.03533333291610082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,64,0,1,fp8,fp8,0,0.03161599983771642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,64,128,1,float16,float16,0,0.03475733349720637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,64,0,1,float16,float16,0,0.033728001018365227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,64,128,1,float16,fp8,0,0.033610666791598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,64,128,1,fp8,fp8,0,0.03178133318821589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,64,0,1,float16,fp8,0,0.03368533402681351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,64,0,1,fp8,fp8,0,0.031386665999889374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,64,128,1,float16,float16,0,0.025349333882331848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,64,0,1,float16,float16,0,0.025424001117547352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,64,128,1,float16,fp8,0,0.025402667621771496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,64,128,1,fp8,fp8,0,0.025610665480295818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,64,0,1,float16,fp8,0,0.025557334224383037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,64,0,1,fp8,fp8,0,0.025797332326571148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,64,128,1,float16,float16,0,0.02532800038655599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,64,0,1,float16,float16,0,0.025600001215934753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,64,128,1,float16,fp8,0,0.025093334416548412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,64,128,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,64,0,1,float16,fp8,0,0.026474667092164356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,64,0,1,fp8,fp8,0,0.02425066630045573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,64,128,1,float16,float16,0,0.02515200028816859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,64,0,1,float16,float16,0,0.025407999753952026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,64,128,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,64,128,1,fp8,fp8,0,0.0234400009115537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,64,0,1,float16,fp8,0,0.025583999852339428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,64,0,1,fp8,fp8,0,0.025173333783944447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,64,128,1,float16,float16,0,0.025029333929220837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,64,0,1,float16,float16,0,0.025685332715511322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,64,128,1,float16,fp8,0,0.025493333737055462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,64,128,1,fp8,fp8,0,0.025072000920772552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,64,0,1,float16,fp8,0,0.02699200063943863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,64,0,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,64,128,1,float16,float16,0,0.8388640085856119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,64,0,1,float16,float16,0,0.8205866813659668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,64,128,1,float16,fp8,0,0.8357439835866293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,64,128,1,fp8,fp8,0,0.781925360361735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,64,0,1,fp8,fp8,0,0.7593920230865479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,64,0,1,float16,fp8,0,0.818015972773234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,64,128,1,float16,float16,0,0.8442880312601725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,64,0,1,float16,float16,0,0.8237013022104899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,64,128,1,float16,fp8,0,0.8400213718414307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,64,128,1,fp8,fp8,0,0.7911199728647867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,64,0,1,float16,fp8,0,0.8234879970550537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,64,0,1,fp8,fp8,0,0.7696959972381592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,64,128,1,float16,float16,0,0.85099196434021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,64,0,1,float16,float16,0,0.8340480327606201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,64,128,1,fp8,fp8,0,0.8339946269989014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,64,128,1,float16,fp8,0,0.8479946454366049
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,64,0,1,float16,fp8,0,0.8293973604838053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,64,128,1,float16,float16,0,0.47601068019866943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,64,0,1,float16,float16,0,0.4678666591644287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,64,128,1,float16,fp8,0,0.46534399191538495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,64,0,1,fp8,fp8,0,0.8141866525014242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,64,128,1,fp8,fp8,0,0.45812801520029706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,64,0,1,float16,fp8,0,0.4557439883550008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,64,0,1,fp8,fp8,0,0.4488106568654378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,64,128,1,float16,float16,0,0.4247359832127889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,64,0,1,float16,float16,0,0.41605865955352783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,64,128,1,fp8,fp8,0,0.39954666296641034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,64,128,1,float16,fp8,0,0.4238239924112956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,64,0,1,float16,fp8,0,0.41549865404764813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,64,0,1,fp8,fp8,0,0.39046935240427655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,64,0,1,float16,float16,0,0.4191253185272217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,64,128,1,float16,float16,0,0.4283733367919922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,64,128,1,float16,fp8,0,0.42977599302927655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,64,128,1,fp8,fp8,0,0.40456533432006836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,64,0,1,float16,fp8,0,0.4184906482696533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,64,0,1,fp8,fp8,0,0.3954879840215047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,64,128,1,float16,float16,0,0.4333866834640503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,64,0,1,float16,float16,0,0.42580266793568927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,64,128,1,float16,fp8,0,0.43270401159922284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,64,128,1,fp8,fp8,0,0.411296010017395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,64,0,1,float16,fp8,0,0.4222506682078044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,64,0,1,fp8,fp8,0,0.4000106652577718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,64,128,1,float16,float16,0,0.2449386715888977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,64,0,1,float16,float16,0,0.24086399873097739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,64,128,1,float16,fp8,0,0.24076267083485922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,64,128,1,fp8,fp8,0,0.2387733260790507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,64,0,1,float16,fp8,0,0.2364799976348877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,64,0,1,fp8,fp8,0,0.23425600926081339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,64,0,1,float16,float16,0,0.21598933140436807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,64,128,1,float16,float16,0,0.22161600987116495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,64,128,1,float16,fp8,0,0.2217280069986979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,64,128,1,fp8,fp8,0,0.20863467454910278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,64,0,1,float16,fp8,0,0.21725332736968994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,64,0,1,fp8,fp8,0,0.2047146757443746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,64,128,1,float16,float16,0,0.22408533096313477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,64,0,1,float16,float16,0,0.21850667397181192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,64,128,1,float16,fp8,0,0.22263999780019125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,64,128,1,fp8,fp8,0,0.21440533796946207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,64,0,1,float16,fp8,0,0.21876800060272217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,64,0,1,fp8,fp8,0,0.20819199085235596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,64,128,1,float16,float16,0,0.22522666056950888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,64,0,1,float16,float16,0,0.22054400046666464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,64,128,1,float16,fp8,0,0.22526399294535318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,64,128,1,fp8,fp8,0,0.21507734060287476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,64,0,1,float16,fp8,0,0.22039467096328735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,64,0,1,fp8,fp8,0,0.2097919980684916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,64,128,1,float16,float16,0,0.13593066732088724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,64,0,1,float16,float16,0,0.13247999548912048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,64,128,1,float16,fp8,0,0.13408000270525613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,64,128,1,fp8,fp8,0,0.13381333152453104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,64,0,1,float16,fp8,0,0.130021333694458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,64,0,1,fp8,fp8,0,0.13153066237767538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,64,128,1,float16,float16,0,0.12018133203188579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,64,0,1,float16,float16,0,0.1179253359635671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,64,128,1,float16,fp8,0,0.12057600418726604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,64,128,1,fp8,fp8,0,0.11397332946459453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,64,0,1,float16,fp8,0,0.11801600456237793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,64,0,1,fp8,fp8,0,0.11129599809646606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,64,128,1,float16,float16,0,0.12037866314252217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,64,0,1,float16,float16,0,0.11946666240692139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,64,128,1,float16,fp8,0,0.12147200107574463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,64,128,1,fp8,fp8,0,0.11570666233698527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,64,0,1,float16,fp8,0,0.11927466591199239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,64,0,1,fp8,fp8,0,0.11223466197649638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,64,0,1,float16,float16,0,0.11983467141787212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,64,128,1,float16,float16,0,0.12298132975896199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,64,128,1,float16,fp8,0,0.12218667070070903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,64,128,1,fp8,fp8,0,0.11752532919247945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,64,0,1,float16,fp8,0,0.12016533811887105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,64,128,1,float16,float16,0,0.07473599910736084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,64,0,1,float16,float16,0,0.07302933434645335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,64,0,1,fp8,fp8,0,0.11529599626859029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,64,128,1,float16,fp8,0,0.07454399764537811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,64,0,1,float16,fp8,0,0.07417599856853485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,64,128,1,fp8,fp8,0,0.07864533364772797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,64,128,1,float16,float16,0,0.06889600058396657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,64,0,1,fp8,fp8,0,0.07696000238259633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,64,0,1,float16,float16,0,0.0684853345155716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,64,128,1,float16,fp8,0,0.0703893353541692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,64,128,1,fp8,fp8,0,0.06449600060780843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,64,0,1,float16,fp8,0,0.06859200199445088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,64,0,1,fp8,fp8,0,0.06308799982070923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,64,128,1,float16,float16,0,0.0695360004901886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,64,0,1,float16,float16,0,0.06788266698519389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,64,128,1,float16,fp8,0,0.07044800122578938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,64,128,1,fp8,fp8,0,0.06760000189145406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,64,0,1,float16,fp8,0,0.0681386689345042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,64,0,1,fp8,fp8,0,0.06407999992370605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,64,128,1,float16,float16,0,0.0708426684141159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,64,0,1,float16,float16,0,0.06878399848937988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,64,128,1,float16,fp8,0,0.07011199990908305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,64,128,1,fp8,fp8,0,0.06630399823188782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,64,0,1,float16,fp8,0,0.0683840016523997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,64,0,1,fp8,fp8,0,0.06486933430035909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,64,128,1,float16,float16,0,0.04563199977080027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,64,0,1,float16,float16,0,0.045466666420300804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,64,128,1,float16,fp8,0,0.0458186666170756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,64,128,1,fp8,fp8,0,0.04533866544564565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,64,0,1,float16,fp8,0,0.04560000201066335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,64,0,1,fp8,fp8,0,0.043680002291997276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,64,128,1,float16,float16,0,0.04373333354791006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,64,0,1,float16,float16,0,0.043807998299598694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,64,128,1,float16,fp8,0,0.045509333411852516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,64,128,1,fp8,fp8,0,0.041663999358812966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,64,0,1,float16,fp8,0,0.043749332427978516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,64,0,1,fp8,fp8,0,0.04014399896065394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,64,128,1,float16,float16,0,0.0458133320013682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,64,0,1,float16,float16,0,0.04366933306058248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,64,128,1,float16,fp8,0,0.04576533536116282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,64,128,1,fp8,fp8,0,0.041589332123597465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,64,0,1,float16,fp8,0,0.04469866553942362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,64,0,1,fp8,fp8,0,0.04091199984153112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,64,128,1,float16,float16,0,0.045834665497144066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,64,0,1,float16,float16,0,0.04387733340263367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,64,128,1,float16,fp8,0,0.04364799956480662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,64,128,1,fp8,fp8,0,0.043680002291997276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,64,0,1,float16,fp8,0,0.04358399907747904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,64,0,1,fp8,fp8,0,0.04178133110205332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,64,128,1,float16,float16,0,0.03363733241955439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,64,0,1,float16,float16,0,0.031557333966096245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,64,128,1,float16,fp8,0,0.03363733241955439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,64,128,1,fp8,fp8,0,0.03129599988460541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,64,0,1,float16,fp8,0,0.031914666295051575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,64,0,1,fp8,fp8,0,0.029493334392706554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,64,128,1,float16,float16,0,0.03162133445342382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,64,0,1,float16,float16,0,0.031632001201311745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,64,128,1,float16,fp8,0,0.03138133386770884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,64,128,1,fp8,fp8,0,0.031141333281993866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,64,0,1,float16,fp8,0,0.03154666721820831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,64,0,1,fp8,fp8,0,0.02951466788848241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,64,128,1,float16,float16,0,0.03223466624816259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,64,0,1,float16,float16,0,0.0312266672650973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,64,128,1,float16,fp8,0,0.033370666205883026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,64,128,1,fp8,fp8,0,0.029669334491093952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,64,0,1,float16,fp8,0,0.03152533372243246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,64,0,1,fp8,fp8,0,0.029333333174387615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,64,128,1,float16,float16,0,0.03154666721820831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,64,0,1,float16,float16,0,0.0315733328461647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,64,128,1,float16,fp8,0,0.03305066625277201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,64,128,1,fp8,fp8,0,0.029690665503342945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,64,0,1,float16,fp8,0,0.03160000095764796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,64,0,1,fp8,fp8,0,0.029264000554879505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,64,128,1,float16,float16,0,0.0233599990606308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,64,0,1,float16,float16,0,0.02329600105683009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,64,128,1,float16,fp8,0,0.02313599983851115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,64,128,1,fp8,fp8,0,0.023930666347344715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,64,0,1,float16,fp8,0,0.02330133318901062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,64,0,1,fp8,fp8,0,0.02306666721900304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,64,128,1,float16,float16,0,0.023914667467276256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,64,0,1,float16,float16,0,0.023189333577950794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,64,128,1,float16,fp8,0,0.02348800003528595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,64,128,1,fp8,fp8,0,0.023354666928450268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,64,0,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,64,0,1,fp8,fp8,0,0.02309333284695943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,64,128,1,float16,float16,0,0.023498666783173878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,64,0,1,float16,float16,0,0.022991999983787537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,64,128,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,64,128,1,fp8,fp8,0,0.022709332406520844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,64,0,1,float16,fp8,0,0.023285334308942158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,64,0,1,fp8,fp8,0,0.023120000958442688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,64,128,1,float16,float16,0,0.023376000424226124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,64,0,1,float16,float16,0,0.023775999744733173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,64,128,1,float16,fp8,0,0.02349333216746648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,64,128,1,fp8,fp8,0,0.021407999098300934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,64,0,1,float16,fp8,0,0.023525332411130268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,64,0,1,fp8,fp8,0,0.02184533327817917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,64,128,1,float16,float16,0,0.021045332153638203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,64,0,1,float16,float16,0,0.021141332884629566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,64,128,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,64,128,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,64,0,1,float16,fp8,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,64,0,1,fp8,fp8,0,0.021183999876181286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,64,128,1,float16,float16,0,0.021349333226680756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,64,0,1,float16,float16,0,0.01942933350801468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,64,128,1,float16,fp8,0,0.021183999876181286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,64,128,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,64,0,1,float16,fp8,0,0.01961600035429001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,64,0,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,64,128,1,float16,float16,0,0.021066665649414062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,64,0,1,float16,float16,0,0.02094399929046631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,64,128,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,64,128,1,fp8,fp8,0,0.020949333906173706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,64,0,1,float16,fp8,0,0.0215786670645078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,64,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,64,128,1,float16,float16,0,0.021541332205136616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,64,0,1,float16,float16,0,0.019434666881958645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,64,128,1,float16,fp8,0,0.021125334004561108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,64,128,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,64,0,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,64,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,64,128,1,float16,float16,0,0.45257067680358887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,64,0,1,float16,float16,0,0.4516853491465251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,64,128,1,float16,fp8,0,0.45014933745066327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,64,128,1,fp8,fp8,0,0.42556798458099365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,64,0,1,fp8,fp8,0,0.4267253478368123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,64,0,1,float16,fp8,0,0.4497386614481608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,64,128,1,float16,float16,0,0.456218679745992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,64,0,1,float16,float16,0,0.45632000764211017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,64,128,1,float16,fp8,0,0.4552053213119507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,64,128,1,fp8,fp8,0,0.4338826735814412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,64,0,1,float16,fp8,0,0.454586664835612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,64,0,1,fp8,fp8,0,0.4340746800104777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,64,128,1,float16,float16,0,0.4604586760203044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,64,128,1,float16,fp8,0,0.45827198028564453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,64,128,1,fp8,fp8,0,0.4397066831588745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,64,0,1,float16,float16,0,0.4603999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,64,0,1,float16,fp8,0,0.4588479995727539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,64,128,1,float16,float16,0,0.257749338944753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,64,0,1,float16,float16,0,0.259168008963267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,64,128,1,float16,fp8,0,0.2539520064989726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,64,0,1,fp8,fp8,0,0.44042134284973145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,64,128,1,fp8,fp8,0,0.2534986734390259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,64,0,1,float16,fp8,0,0.2550506591796875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,64,0,1,fp8,fp8,0,0.2547573248545329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,64,128,1,float16,float16,0,0.23476266860961914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,64,0,1,float16,float16,0,0.23468265930811563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,64,128,1,float16,fp8,0,0.23444799582163492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,64,128,1,fp8,fp8,0,0.22186134258906046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,64,0,1,float16,fp8,0,0.23466134071350098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,64,0,1,fp8,fp8,0,0.22235733270645142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,64,128,1,float16,float16,0,0.23613866170247397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,64,0,1,float16,float16,0,0.2359679937362671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,64,128,1,float16,fp8,0,0.23600532611211142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,64,128,1,fp8,fp8,0,0.22710933287938437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,64,0,1,fp8,fp8,0,0.2275786598523458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,64,0,1,float16,fp8,0,0.23513599236806235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,64,128,1,float16,float16,0,0.23931199312210083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,64,0,1,float16,float16,0,0.239029328028361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,64,128,1,float16,fp8,0,0.23893866936365762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,64,128,1,fp8,fp8,0,0.2293706734975179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,64,0,1,float16,fp8,0,0.23882667223612467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,64,0,1,fp8,fp8,0,0.22961066166559854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,64,128,1,float16,float16,0,0.14018133282661438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,64,128,1,float16,fp8,0,0.13657066226005554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,64,0,1,float16,float16,0,0.13923733433087668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,64,128,1,fp8,fp8,0,0.13826666275660196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,64,0,1,float16,fp8,0,0.13664533694585165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,64,0,1,fp8,fp8,0,0.1379680037498474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,64,128,1,float16,float16,0,0.12434666355450948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,64,0,1,float16,float16,0,0.12418133020401001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,64,128,1,float16,fp8,0,0.1253866652647654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,64,128,1,fp8,fp8,0,0.11674666404724121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,64,0,1,float16,fp8,0,0.1251626710096995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,64,0,1,fp8,fp8,0,0.11734400192896526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,64,128,1,float16,float16,0,0.12591999769210815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,64,0,1,float16,float16,0,0.12563199798266092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,64,128,1,float16,fp8,0,0.1267306705315908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,64,128,1,fp8,fp8,0,0.11949867010116577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,64,0,1,float16,fp8,0,0.12743467092514038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,64,0,1,fp8,fp8,0,0.12045866250991821
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,64,128,1,float16,float16,0,0.12650133172671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,64,0,1,float16,float16,0,0.12774399916330972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,64,128,1,float16,fp8,0,0.1276800036430359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,64,128,1,fp8,fp8,0,0.1216266651948293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,64,128,1,float16,float16,0,0.07497600217660268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,64,0,1,float16,fp8,0,0.12801067034403482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,64,0,1,fp8,fp8,0,0.12343999743461609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,64,0,1,float16,float16,0,0.07507200042406718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,64,128,1,float16,fp8,0,0.07458133498827617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,64,128,1,fp8,fp8,0,0.07851733267307281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,64,0,1,float16,fp8,0,0.07576533158620198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,64,0,1,fp8,fp8,0,0.07786133388678233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,64,128,1,float16,float16,0,0.0703359991312027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,64,0,1,float16,float16,0,0.07051733136177063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,64,128,1,float16,fp8,0,0.07042666773001353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,64,128,1,fp8,fp8,0,0.06573333342870076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,64,0,1,float16,fp8,0,0.07022400200366974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,64,0,1,fp8,fp8,0,0.06568000217278798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,64,128,1,float16,float16,0,0.07042666773001353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,64,0,1,float16,float16,0,0.07045866549015045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,64,128,1,float16,fp8,0,0.07081600030263265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,64,128,1,fp8,fp8,0,0.06670933465162913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,64,0,1,fp8,fp8,0,0.0665226678053538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,64,128,1,float16,float16,0,0.07116800049940745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,64,0,1,float16,float16,0,0.07155199845631917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,64,0,1,float16,fp8,0,0.07122133175532024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,64,128,1,float16,fp8,0,0.07087466617425282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,64,128,1,fp8,fp8,0,0.06648000081380208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,64,0,1,float16,fp8,0,0.0705386648575465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,64,0,1,fp8,fp8,0,0.06733866532643636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,64,128,1,float16,float16,0,0.045893331368764244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,64,0,1,float16,float16,0,0.043840001026789345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,64,128,1,float16,fp8,0,0.04388799766699473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,64,0,1,float16,fp8,0,0.04563733438650767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,64,128,1,fp8,fp8,0,0.04435733457406362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,64,0,1,fp8,fp8,0,0.04372799893220266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,64,128,1,float16,float16,0,0.043765331308046974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,64,0,1,float16,float16,0,0.04275733232498169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,64,128,1,float16,fp8,0,0.04351999859015147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,64,128,1,fp8,fp8,0,0.0415786678592364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,64,0,1,float16,fp8,0,0.04385599990685781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,64,0,1,fp8,fp8,0,0.04124266654253006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,64,128,1,float16,float16,0,0.04363733530044556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,64,0,1,float16,float16,0,0.044122666120529175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,64,128,1,float16,fp8,0,0.043866669138272606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,64,128,1,fp8,fp8,0,0.04167999823888143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,64,0,1,float16,fp8,0,0.04369066655635834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,64,0,1,fp8,fp8,0,0.041946664452552795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,64,128,1,float16,float16,0,0.0447626660267512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,64,0,1,float16,float16,0,0.04358933369318644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,64,128,1,float16,fp8,0,0.043866669138272606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,64,128,1,fp8,fp8,0,0.04243200023969015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,64,0,1,float16,fp8,0,0.04383466641108195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,64,128,1,float16,float16,0,0.03151999910672506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,64,0,1,fp8,fp8,0,0.04301333427429199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,64,0,1,float16,float16,0,0.03068800022204717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,64,128,1,float16,fp8,0,0.03124266614516576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,64,128,1,fp8,fp8,0,0.02934933453798294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,64,0,1,float16,fp8,0,0.03163733333349228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,64,0,1,fp8,fp8,0,0.03136000037193298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,64,128,1,float16,float16,0,0.02995733420054118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,64,0,1,float16,float16,0,0.02940266579389572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,64,128,1,float16,fp8,0,0.03148266673088074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,64,128,1,fp8,fp8,0,0.028234665592511494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,64,0,1,float16,fp8,0,0.03133866687615713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,64,0,1,fp8,fp8,0,0.029146666328112285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,64,128,1,float16,float16,0,0.031712000568707786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,64,0,1,float16,float16,0,0.0313226655125618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,64,128,1,float16,fp8,0,0.029472000896930695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,64,128,1,fp8,fp8,0,0.0295413335164388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,64,0,1,float16,fp8,0,0.031504000226656594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,64,0,1,fp8,fp8,0,0.029018667836983997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,64,128,1,float16,float16,0,0.031658666829268135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,64,0,1,float16,float16,0,0.031370667119820915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,64,128,1,float16,fp8,0,0.031354665756225586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,64,128,1,fp8,fp8,0,0.029493334392706554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,64,0,1,float16,fp8,0,0.029711998999118805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,64,0,1,fp8,fp8,0,0.029487999776999157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,64,128,1,float16,float16,0,0.02367466688156128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,64,0,1,float16,float16,0,0.023584000766277313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,64,128,1,float16,fp8,0,0.02332799881696701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,64,128,1,fp8,fp8,0,0.02348800003528595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,64,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,64,0,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,64,128,1,float16,float16,0,0.02350933353106181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,64,0,1,float16,float16,0,0.023546665906906128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,64,128,1,float16,fp8,0,0.023061332603295643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,64,128,1,fp8,fp8,0,0.02330133318901062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,64,0,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,64,0,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,64,128,1,float16,float16,0,0.023413332800070446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,64,0,1,float16,float16,0,0.023285334308942158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,64,128,1,float16,fp8,0,0.0252960001428922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,64,128,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,64,0,1,float16,fp8,0,0.023397333920001984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,64,0,1,fp8,fp8,0,0.02319466571013133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,64,128,1,float16,float16,0,0.02333866556485494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,64,0,1,float16,float16,0,0.023130667706330616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,64,128,1,float16,fp8,0,0.02518933266401291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,64,128,1,fp8,fp8,0,0.023061332603295643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,64,0,1,float16,fp8,0,0.025248001019159954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,64,0,1,fp8,fp8,0,0.023610666394233704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,64,128,1,float16,float16,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,64,0,1,float16,float16,0,0.01924266666173935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,64,128,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,64,128,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,64,0,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,64,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,64,128,1,float16,float16,0,0.017535999417304993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,64,0,1,float16,float16,0,0.019082666685183842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,64,128,1,fp8,fp8,0,0.01754666616519292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,64,0,1,float16,fp8,0,0.017818666994571686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,64,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,64,128,1,float16,float16,0,0.01897066707412402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,64,0,1,float16,float16,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,64,128,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,64,128,1,fp8,fp8,0,0.018794666975736618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,64,0,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,64,0,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,64,128,1,float16,float16,0,0.01897066707412402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,64,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,64,128,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,64,128,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,64,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,64,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,64,128,1,float16,float16,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,64,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,64,128,1,float16,fp8,0,0.018917333334684372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,64,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,64,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,64,128,1,float16,float16,0,0.01747200017174085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,64,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,64,128,1,float16,fp8,0,0.017770666629076004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,64,128,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,64,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,64,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,64,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,64,128,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,64,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,64,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,64,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,64,0,1,float16,float16,0,0.01762666677435239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,64,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,64,128,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,64,128,1,float16,float16,0,0.32049065828323364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,64,0,1,float16,float16,0,0.3202613393465678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,64,128,1,float16,fp8,0,0.3203999996185303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,64,128,1,fp8,fp8,0,0.2978026668230693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,64,0,1,fp8,fp8,0,0.2980159918467204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,64,0,1,float16,fp8,0,0.32046933968861896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,64,0,1,float16,float16,0,0.3230453332265218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,64,128,1,fp8,fp8,0,0.30291734139124554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,64,128,1,float16,fp8,0,0.32266666491826373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,64,128,1,float16,float16,0,0.32417599360148114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,64,0,1,fp8,fp8,0,0.30340800682703656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,64,0,1,float16,fp8,0,0.32289065917332965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,64,128,1,float16,float16,0,0.32494932413101196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,64,0,1,float16,float16,0,0.3244746724764506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,64,128,1,fp8,fp8,0,0.3044640024503072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,64,128,1,float16,fp8,0,0.3240533272425334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,64,128,1,float16,float16,0,0.18209065993626913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,64,0,1,float16,fp8,0,0.3237920006116231
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,64,0,1,fp8,fp8,0,0.3048373262087504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,64,0,1,float16,float16,0,0.1811573306719462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,64,128,1,float16,fp8,0,0.18016533056894937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,64,128,1,fp8,fp8,0,0.1760800083478292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,64,0,1,float16,fp8,0,0.1792479952176412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,64,128,1,float16,float16,0,0.16876800855000815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,64,0,1,fp8,fp8,0,0.17670933405558267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,64,0,1,float16,float16,0,0.1699893275896708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,64,128,1,fp8,fp8,0,0.15558933218320212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,64,128,1,float16,fp8,0,0.16886399189631143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,64,0,1,float16,fp8,0,0.16942399740219116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,64,0,1,fp8,fp8,0,0.15527466932932535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,64,128,1,float16,float16,0,0.16892266273498535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,64,0,1,float16,float16,0,0.16981865962346396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,64,128,1,float16,fp8,0,0.16932799418767294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,64,128,1,fp8,fp8,0,0.15713600317637125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,64,0,1,float16,fp8,0,0.16885334253311157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,64,0,1,fp8,fp8,0,0.1574026644229889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,64,128,1,float16,float16,0,0.17073599497477213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,64,0,1,float16,float16,0,0.1710666616757711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,64,128,1,float16,fp8,0,0.17105066776275635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,64,128,1,fp8,fp8,0,0.16064000129699707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,64,0,1,float16,fp8,0,0.17112533251444498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,64,128,1,float16,float16,0,0.09710933764775594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,64,0,1,fp8,fp8,0,0.16062933206558228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,64,0,1,float16,float16,0,0.097461332877477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,64,128,1,float16,fp8,0,0.09711466232935588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,64,128,1,fp8,fp8,0,0.09719467163085938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,64,0,1,float16,fp8,0,0.09730666875839233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,64,0,1,fp8,fp8,0,0.09670933087666829
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,64,128,1,float16,float16,0,0.09084266424179077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,64,0,1,float16,float16,0,0.0909440020720164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,64,128,1,float16,fp8,0,0.0906880001227061
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,64,128,1,fp8,fp8,0,0.08389866352081299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,64,0,1,float16,fp8,0,0.09066667159398396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,64,0,1,fp8,fp8,0,0.08482666810353597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,64,128,1,float16,float16,0,0.09082133571306865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,64,0,1,float16,float16,0,0.09100799759229024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,64,128,1,float16,fp8,0,0.09127466877301534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,64,128,1,fp8,fp8,0,0.0846453309059143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,64,0,1,float16,fp8,0,0.09104532996813457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,64,0,1,fp8,fp8,0,0.08528533577919006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,64,128,1,float16,float16,0,0.09117333094278972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,64,128,1,float16,fp8,0,0.0925333301226298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,64,0,1,float16,float16,0,0.09221333265304565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,64,128,1,fp8,fp8,0,0.08547733227411906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,64,0,1,float16,fp8,0,0.0913759966691335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,64,0,1,fp8,fp8,0,0.08574933807055156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,64,128,1,float16,float16,0,0.05648000041643778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,64,0,1,float16,float16,0,0.05606933434804281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,64,128,1,float16,fp8,0,0.05717866619427999
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,64,128,1,fp8,fp8,0,0.054101333022117615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,64,0,1,fp8,fp8,0,0.05388799806435903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,64,128,1,float16,float16,0,0.05243733525276184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,64,0,1,float16,fp8,0,0.05470933516820272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,64,0,1,float16,float16,0,0.054010664423306785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,64,128,1,float16,fp8,0,0.053301334381103516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,64,128,1,fp8,fp8,0,0.04976533353328705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,64,0,1,float16,fp8,0,0.05372266471385956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,64,0,1,fp8,fp8,0,0.04979733129342397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,64,0,1,float16,float16,0,0.05382933219273885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,64,128,1,float16,float16,0,0.053173333406448364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,64,128,1,fp8,fp8,0,0.050101334849993386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,64,128,1,float16,fp8,0,0.05399466554323832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,64,0,1,float16,fp8,0,0.05377600093682607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,64,0,1,fp8,fp8,0,0.052144000927607216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,64,128,1,float16,float16,0,0.05397866666316986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,64,0,1,float16,float16,0,0.05367999772230784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,64,128,1,float16,fp8,0,0.05411200225353241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,64,128,1,fp8,fp8,0,0.051882664362589516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,64,0,1,float16,fp8,0,0.05393599967161814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,64,0,1,fp8,fp8,0,0.051962668697039284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,64,128,1,float16,float16,0,0.03633599976698557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,64,0,1,float16,float16,0,0.03573333223660787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,64,128,1,float16,fp8,0,0.035786665976047516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,64,128,1,fp8,fp8,0,0.03573866685231527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,64,0,1,float16,fp8,0,0.03576533248027166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,64,0,1,fp8,fp8,0,0.0334346666932106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,64,128,1,float16,float16,0,0.03541333228349686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,64,0,1,float16,float16,0,0.03573333223660787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,64,128,1,float16,fp8,0,0.035487999518712364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,64,128,1,fp8,fp8,0,0.033402666449546814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,64,0,1,float16,fp8,0,0.03565866748491923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,64,0,1,fp8,fp8,0,0.0335413341720899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,64,128,1,float16,float16,0,0.03409600009520849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,64,0,1,float16,float16,0,0.03532266616821289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,64,128,1,float16,fp8,0,0.03535466641187668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,64,128,1,fp8,fp8,0,0.0336053321758906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,64,0,1,float16,fp8,0,0.03473600000143051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,64,0,1,fp8,fp8,0,0.03364266703526179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,64,128,1,float16,float16,0,0.03603200117746989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,64,0,1,float16,float16,0,0.035445332527160645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,64,128,1,float16,fp8,0,0.035317334036032356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,64,128,1,fp8,fp8,0,0.03363200028737386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,64,0,1,float16,fp8,0,0.03569599986076355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,64,0,1,fp8,fp8,0,0.033717334270477295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,64,128,1,float16,float16,0,0.027189334233601887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,64,0,1,float16,float16,0,0.027317332724730175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,64,128,1,float16,fp8,0,0.02719466636578242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,64,128,1,fp8,fp8,0,0.02718399961789449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,64,0,1,float16,fp8,0,0.027285332481066387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,64,0,1,fp8,fp8,0,0.02644266684850057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,64,128,1,float16,float16,0,0.025205334027608235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,64,0,1,float16,float16,0,0.025306666890780132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,64,128,1,float16,fp8,0,0.02531733363866806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,64,128,1,fp8,fp8,0,0.025226667523384094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,64,0,1,float16,fp8,0,0.025519999365011852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,64,0,1,fp8,fp8,0,0.02515733242034912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,64,128,1,float16,float16,0,0.025114665428797405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,64,0,1,float16,float16,0,0.025775998830795288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,64,128,1,float16,fp8,0,0.02646933247645696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,64,128,1,fp8,fp8,0,0.02513066679239273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,64,0,1,float16,fp8,0,0.025568000972270966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,64,0,1,fp8,fp8,0,0.025146665672461193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,64,128,1,float16,float16,0,0.025349333882331848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,64,0,1,float16,float16,0,0.02628266563018163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,64,128,1,float16,fp8,0,0.025701334079106648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,64,128,1,fp8,fp8,0,0.025616000096003216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,64,0,1,float16,fp8,0,0.027232001225153606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,64,0,1,fp8,fp8,0,0.025231999655564625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,64,128,1,float16,float16,0,0.021040000021457672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,64,0,1,float16,float16,0,0.020986666282018025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,64,128,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,64,128,1,fp8,fp8,0,0.020960000654061634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,64,0,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,64,0,1,fp8,fp8,0,0.01947733387351036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,64,128,1,float16,float16,0,0.021055998901526134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,64,0,1,float16,float16,0,0.021274665991465252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,64,128,1,float16,fp8,0,0.020960000654061634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,64,128,1,fp8,fp8,0,0.020970667401949566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,64,0,1,float16,fp8,0,0.02145066608985265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,64,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,64,128,1,float16,float16,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,64,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,64,128,1,float16,fp8,0,0.021040000021457672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,64,128,1,fp8,fp8,0,0.021183999876181286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,64,0,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,64,0,1,fp8,fp8,0,0.020949333906173706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,64,128,1,float16,float16,0,0.021551998953024547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,64,0,1,float16,float16,0,0.020949333906173706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,64,128,1,float16,fp8,0,0.020746666938066483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,64,128,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,64,0,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,64,0,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,64,128,1,float16,float16,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,64,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,64,128,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,64,128,1,float16,float16,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,64,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,64,128,1,float16,fp8,0,0.017498667041460674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,64,128,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,64,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,64,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,64,128,1,float16,float16,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,64,128,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,64,128,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,64,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,64,128,1,float16,float16,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,64,0,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,64,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,64,128,1,float16,float16,0,0.015471999843915304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,64,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,64,128,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,64,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,64,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,64,128,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,64,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,64,0,1,float16,fp8,0,0.01815466706951459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,64,128,1,float16,float16,0,0.015466666469971338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,64,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,64,128,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,64,128,1,float16,float16,0,0.01532799998919169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,64,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,64,128,1,fp8,fp8,0,0.017743999759356182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,64,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,64,128,1,float16,float16,0,0.25882667303085327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,64,128,1,float16,fp8,0,0.2576319972674052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,64,0,1,float16,float16,0,0.25777600208918255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,64,128,1,fp8,fp8,0,0.23456533749898276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,64,0,1,float16,fp8,0,0.2574346661567688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,64,0,1,fp8,fp8,0,0.2339093287785848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,64,128,1,float16,float16,0,0.2595786650975545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,64,0,1,float16,float16,0,0.2592373291651408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,64,128,1,float16,fp8,0,0.25872000058492023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,64,128,1,fp8,fp8,0,0.2365973393122355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,64,0,1,float16,fp8,0,0.2574186722437541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,64,0,1,fp8,fp8,0,0.23669334252675375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,64,128,1,float16,float16,0,0.2593280076980591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,64,0,1,float16,float16,0,0.25859200954437256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,64,128,1,float16,fp8,0,0.2595466574033101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,64,128,1,fp8,fp8,0,0.23873066902160645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,64,0,1,float16,fp8,0,0.2592693368593852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,64,128,1,float16,float16,0,0.14057067036628723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,64,0,1,float16,float16,0,0.14071999986966452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,64,0,1,fp8,fp8,0,0.2399946649869283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,64,128,1,float16,fp8,0,0.140474667151769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,64,128,1,fp8,fp8,0,0.13514133294423422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,64,0,1,float16,fp8,0,0.14061866203943887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,64,0,1,fp8,fp8,0,0.13638933499654135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,64,128,1,float16,float16,0,0.13384532928466797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,64,0,1,float16,float16,0,0.1343839963277181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,64,128,1,float16,fp8,0,0.1362559994061788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,64,128,1,fp8,fp8,0,0.12223466237386067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,64,0,1,float16,fp8,0,0.1341386636098226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,64,0,1,fp8,fp8,0,0.12338667114575703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,64,128,1,float16,float16,0,0.13471999764442444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,64,0,1,float16,float16,0,0.1343893309434255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,64,128,1,fp8,fp8,0,0.12378133336702983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,64,128,1,float16,fp8,0,0.13518399993578592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,64,0,1,float16,fp8,0,0.13485866785049438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,64,0,1,fp8,fp8,0,0.12370666861534119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,64,128,1,float16,float16,0,0.1362399955590566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,64,0,1,float16,float16,0,0.13633599877357483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,64,128,1,float16,fp8,0,0.1342080036799113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,64,128,1,fp8,fp8,0,0.12502933541933695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,64,0,1,float16,fp8,0,0.1362986663977305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,64,128,1,float16,float16,0,0.07683733105659485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,64,0,1,fp8,fp8,0,0.12370666861534119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,64,128,1,float16,fp8,0,0.07700266440709432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,64,0,1,float16,float16,0,0.07483200232187907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,64,128,1,fp8,fp8,0,0.07256000240643819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,64,0,1,fp8,fp8,0,0.07100800176461537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,64,128,1,float16,float16,0,0.0745119998852412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,64,0,1,float16,fp8,0,0.07458666463692983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,64,0,1,float16,float16,0,0.0742986649274826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,64,128,1,fp8,fp8,0,0.0698773314555486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,64,128,1,float16,fp8,0,0.07394666473070781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,64,0,1,float16,fp8,0,0.07377600173155467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,64,0,1,fp8,fp8,0,0.06845333178838094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,64,128,1,float16,float16,0,0.07478400071461995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,64,0,1,float16,float16,0,0.0743999977906545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,64,128,1,float16,fp8,0,0.07435200115044911
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,64,128,1,fp8,fp8,0,0.07025599976380666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,64,0,1,fp8,fp8,0,0.06972266733646393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,64,0,1,float16,fp8,0,0.07493333518505096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,64,128,1,float16,float16,0,0.07437866429487865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,64,0,1,float16,float16,0,0.07472000022729237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,64,128,1,float16,fp8,0,0.07483200232187907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,64,128,1,fp8,fp8,0,0.0708000014225642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,64,0,1,float16,fp8,0,0.07450133562088013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,64,0,1,fp8,fp8,0,0.06956799825032552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,64,0,1,float16,float16,0,0.0459199994802475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,64,128,1,float16,float16,0,0.047184000412623085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,64,128,1,float16,fp8,0,0.046122665206591286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,64,128,1,fp8,fp8,0,0.04375466704368591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,64,0,1,float16,fp8,0,0.04604800045490265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,64,0,1,fp8,fp8,0,0.04527466495831808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,64,128,1,float16,float16,0,0.04435733457406362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,64,0,1,float16,float16,0,0.04585599899291992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,64,128,1,float16,fp8,0,0.044922664761543274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,64,128,1,fp8,fp8,0,0.044079999128977455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,64,0,1,float16,fp8,0,0.04571199913819631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,64,0,1,fp8,fp8,0,0.04155733436346054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,64,128,1,float16,float16,0,0.045034666856129967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,64,0,1,float16,float16,0,0.04561600089073181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,64,128,1,float16,fp8,0,0.045834665497144066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,64,128,1,fp8,fp8,0,0.0415786678592364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,64,0,1,float16,fp8,0,0.045642669002215065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,64,0,1,fp8,fp8,0,0.04162666698296865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,64,128,1,float16,float16,0,0.04578666885693868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,64,0,1,float16,float16,0,0.0444106658299764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,64,128,1,float16,fp8,0,0.04593066871166229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,64,128,1,fp8,fp8,0,0.04377600053946177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,64,0,1,float16,fp8,0,0.045594667394955955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,64,0,1,fp8,fp8,0,0.042352000872294106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,64,128,1,float16,float16,0,0.03166399896144867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,64,0,1,float16,float16,0,0.03163733333349228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,64,128,1,float16,fp8,0,0.03142400085926056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,64,128,1,fp8,fp8,0,0.031445334355036415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,64,0,1,float16,fp8,0,0.03190399954716364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,64,0,1,fp8,fp8,0,0.029663999875386555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,64,0,1,float16,float16,0,0.0313226655125618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,64,128,1,float16,float16,0,0.029951999584833782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,64,128,1,float16,fp8,0,0.031231999397277832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,64,128,1,fp8,fp8,0,0.029781334102153778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,64,0,1,float16,fp8,0,0.03146133323510488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,64,0,1,fp8,fp8,0,0.029301332930723827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,64,128,1,float16,float16,0,0.029861333469549816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,64,0,1,float16,float16,0,0.03162666658560435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,64,128,1,float16,fp8,0,0.03142933299144109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,64,128,1,fp8,fp8,0,0.03012266755104065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,64,0,1,float16,fp8,0,0.030271999537944794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,64,0,1,fp8,fp8,0,0.029504001140594482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,64,128,1,float16,float16,0,0.031504000226656594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,64,0,1,float16,float16,0,0.029605334003766377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,64,128,1,float16,fp8,0,0.031898667414983116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,64,128,1,fp8,fp8,0,0.029461334149042766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,64,0,1,float16,fp8,0,0.03092266619205475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,64,0,1,fp8,fp8,0,0.031013332307338715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,64,128,1,float16,float16,0,0.023610666394233704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,64,0,1,float16,float16,0,0.0229120006163915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,64,128,1,float16,fp8,0,0.023285334308942158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,64,128,1,fp8,fp8,0,0.02317333221435547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,64,0,1,float16,fp8,0,0.02348800003528595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,64,0,1,fp8,fp8,0,0.022954667607943218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,64,128,1,float16,float16,0,0.023056000471115112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,64,0,1,float16,float16,0,0.023103999594847362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,64,128,1,float16,fp8,0,0.02332799881696701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,64,128,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,64,0,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,64,0,1,fp8,fp8,0,0.0215786670645078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,64,128,1,float16,float16,0,0.023423999547958374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,64,0,1,float16,float16,0,0.023168000082174938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,64,128,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,64,128,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,64,0,1,float16,fp8,0,0.023530667026837666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,64,0,1,fp8,fp8,0,0.023056000471115112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,64,128,1,float16,float16,0,0.023024000227451324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,64,0,1,float16,float16,0,0.023215999205907185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,64,128,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,64,128,1,fp8,fp8,0,0.023381332556406658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,64,0,1,float16,fp8,0,0.023178666830062866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,64,0,1,fp8,fp8,0,0.022842665513356526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,64,128,1,float16,float16,0,0.019440000255902607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,64,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,64,128,1,float16,fp8,0,0.021226666867733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,64,128,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,64,0,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,64,0,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,64,128,1,float16,float16,0,0.01922133316596349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,64,0,1,float16,float16,0,0.018981333822011948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,64,128,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,64,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,64,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,64,0,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,64,128,1,float16,float16,0,0.018746666610240936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,64,0,1,float16,float16,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,64,128,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,64,128,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,64,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,64,128,1,float16,float16,0,0.019098666807015736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,64,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,64,0,1,float16,float16,0,0.019424000134070713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,64,128,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,64,128,1,fp8,fp8,0,0.019424000134070713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,64,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,64,0,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,64,128,1,float16,float16,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,64,128,1,fp8,fp8,0,0.015941333025693893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,64,0,1,fp8,fp8,0,0.016714667280515034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,64,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,64,128,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,64,128,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,64,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,64,128,1,float16,float16,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,64,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,64,128,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,64,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,64,128,1,float16,float16,0,0.015125333021084467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,64,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,64,128,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,64,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,64,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,64,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,64,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,64,128,1,fp8,fp8,0,0.015546667079130808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,64,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,64,128,1,float16,float16,0,0.015210667004187902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,64,128,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,64,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,64,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,64,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,64,128,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,64,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,64,0,1,fp8,fp8,0,0.015461333096027374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,64,128,1,float16,float16,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,64,128,1,float16,fp8,0,0.017583999782800674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,64,0,1,float16,float16,0,0.01571200042963028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,64,128,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,64,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,64,0,1,fp8,fp8,0,0.01573333392540614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,64,128,1,float16,float16,0,0.22430932521820068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,64,0,1,float16,float16,0,0.22424532969792685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,64,128,1,float16,fp8,0,0.22503999869028726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,64,128,1,fp8,fp8,0,0.20363734165827432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,64,0,1,float16,fp8,0,0.22408000628153482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,64,0,1,fp8,fp8,0,0.20485333601633707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,64,128,1,float16,float16,0,0.2232053279876709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,64,0,1,float16,float16,0,0.22356800238291422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,64,128,1,fp8,fp8,0,0.20375466346740723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,64,128,1,float16,fp8,0,0.2256586750348409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,64,0,1,float16,fp8,0,0.22260799010594687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,64,0,1,fp8,fp8,0,0.20356800158818564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,64,0,1,float16,float16,0,0.22431466976801553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,64,128,1,float16,float16,0,0.22499734163284302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,64,128,1,float16,fp8,0,0.22433066368103027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,64,128,1,fp8,fp8,0,0.20360000928243002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,64,0,1,float16,fp8,0,0.2243679960568746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,64,128,1,float16,float16,0,0.12005333105723064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,64,0,1,fp8,fp8,0,0.20444266001383463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,64,0,1,float16,float16,0,0.1200266679128011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,64,128,1,float16,fp8,0,0.1200320025285085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,64,128,1,fp8,fp8,0,0.11146133144696553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,64,0,1,float16,fp8,0,0.11947199702262878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,64,0,1,fp8,fp8,0,0.11137066284815471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,64,128,1,float16,float16,0,0.11755200227101643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,64,0,1,float16,float16,0,0.11760000387827556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,64,128,1,float16,fp8,0,0.11777066191037495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,64,128,1,fp8,fp8,0,0.10809600353240967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,64,0,1,float16,fp8,0,0.11762133240699768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,64,0,1,fp8,fp8,0,0.10910399754842122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,64,128,1,float16,float16,0,0.11755200227101643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,64,0,1,float16,float16,0,0.11803199847539265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,64,128,1,float16,fp8,0,0.11806399623552959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,64,128,1,fp8,fp8,0,0.10929600397745769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,64,0,1,float16,fp8,0,0.11781866351763408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,64,0,1,fp8,fp8,0,0.10950932900110881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,64,128,1,float16,float16,0,0.11754133303960164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,64,0,1,float16,float16,0,0.11778133114178975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,64,128,1,float16,fp8,0,0.11769599715868632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,64,128,1,fp8,fp8,0,0.11116266250610352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,64,0,1,float16,fp8,0,0.11926933129628499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,64,0,1,fp8,fp8,0,0.10946666200955708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,64,128,1,float16,float16,0,0.06700799862543742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,64,0,1,float16,float16,0,0.06838933130105336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,64,128,1,fp8,fp8,0,0.06221333146095276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,64,0,1,float16,fp8,0,0.06790400048096974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,64,128,1,float16,fp8,0,0.0678773323694865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,64,0,1,fp8,fp8,0,0.06414400041103363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,64,128,1,float16,float16,0,0.0662773350874583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,64,0,1,float16,float16,0,0.06585066517194112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,64,128,1,fp8,fp8,0,0.06232533355553945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,64,128,1,float16,fp8,0,0.0670826683441798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,64,0,1,float16,fp8,0,0.06630933284759521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,64,0,1,fp8,fp8,0,0.062165334820747375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,64,128,1,float16,float16,0,0.06621866424878438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,64,128,1,float16,fp8,0,0.06664533416430156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,64,0,1,float16,float16,0,0.06687999765078227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,64,128,1,fp8,fp8,0,0.06223999957243601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,64,0,1,float16,fp8,0,0.06629333396752675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,64,0,1,fp8,fp8,0,0.06235733131567637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,64,0,1,float16,float16,0,0.06628799935181935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,64,128,1,float16,float16,0,0.06612800061702728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,64,128,1,float16,fp8,0,0.06653866668542226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,64,128,1,fp8,fp8,0,0.06238399942715963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,64,0,1,float16,fp8,0,0.0665280024210612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,64,0,1,fp8,fp8,0,0.06208533545335134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,64,128,1,float16,float16,0,0.043162668744723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,64,0,1,float16,float16,0,0.04179200033346812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,64,128,1,float16,fp8,0,0.041850666205088295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,64,128,1,fp8,fp8,0,0.039488000174363456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,64,0,1,float16,fp8,0,0.04186133543650309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,64,0,1,fp8,fp8,0,0.039834665755430855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,64,128,1,float16,float16,0,0.04165333261092504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,64,0,1,float16,float16,0,0.04165866722663244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,64,128,1,float16,fp8,0,0.04173333446184794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,64,128,1,fp8,fp8,0,0.03962666789690653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,64,0,1,float16,fp8,0,0.04144000013669332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,64,0,1,fp8,fp8,0,0.03938666731119156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,64,128,1,float16,float16,0,0.04166933397452036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,64,128,1,float16,fp8,0,0.04155733436346054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,64,0,1,float16,float16,0,0.04189866781234741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,64,128,1,fp8,fp8,0,0.04001600046952566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,64,0,1,float16,fp8,0,0.041759997606277466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,64,0,1,fp8,fp8,0,0.03956266740957896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,64,128,1,float16,float16,0,0.04161600023508072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,64,0,1,float16,float16,0,0.04156800111134847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,64,128,1,float16,fp8,0,0.041893333196640015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,64,128,1,fp8,fp8,0,0.039808000127474465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,64,0,1,float16,fp8,0,0.041840001940727234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,64,0,1,fp8,fp8,0,0.03958933303753535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,64,128,1,float16,float16,0,0.029493334392706554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,64,0,1,float16,float16,0,0.029071999092896778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,64,128,1,float16,fp8,0,0.029365333418051403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,64,128,1,fp8,fp8,0,0.027210667729377747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,64,0,1,float16,fp8,0,0.027952000498771667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,64,0,1,fp8,fp8,0,0.027503999571005504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,64,128,1,float16,float16,0,0.027327999472618103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,64,0,1,float16,float16,0,0.027823999524116516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,64,128,1,float16,fp8,0,0.02773866554101308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,64,128,1,fp8,fp8,0,0.02731200059254964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,64,0,1,float16,fp8,0,0.029631999631722767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,64,0,1,fp8,fp8,0,0.027429332335789997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,64,128,1,float16,float16,0,0.029258665939172108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,64,0,1,float16,float16,0,0.029450667401154835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,64,128,1,float16,fp8,0,0.027749332288901012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,64,128,1,fp8,fp8,0,0.02769600103298823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,64,0,1,float16,fp8,0,0.02938666691382726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,64,0,1,fp8,fp8,0,0.02739199995994568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,64,128,1,float16,float16,0,0.0296426663796107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,64,128,1,float16,fp8,0,0.029296000798543293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,64,0,1,float16,float16,0,0.029290666182835896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,64,128,1,fp8,fp8,0,0.027429332335789997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,64,0,1,float16,fp8,0,0.02758399893840154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,64,128,1,float16,float16,0,0.02128533273935318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,64,0,1,fp8,fp8,0,0.027221334477265675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,64,0,1,float16,float16,0,0.020986666282018025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,64,128,1,float16,fp8,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,64,128,1,fp8,fp8,0,0.021568000316619873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,64,0,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,64,0,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,64,128,1,float16,float16,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,64,0,1,float16,float16,0,0.021370666722456615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,64,128,1,fp8,fp8,0,0.02149333308140437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,64,128,1,float16,fp8,0,0.021541332205136616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,64,0,1,float16,fp8,0,0.02333866556485494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,64,0,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,64,128,1,float16,float16,0,0.022122666239738464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,64,0,1,float16,float16,0,0.021349333226680756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,64,128,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,64,128,1,fp8,fp8,0,0.021365332106749218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,64,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,64,0,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,64,128,1,float16,float16,0,0.02199999988079071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,64,0,1,float16,float16,0,0.021151999632517498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,64,128,1,float16,fp8,0,0.021429332594076794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,64,128,1,fp8,fp8,0,0.02144533395767212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,64,0,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,64,0,1,fp8,fp8,0,0.02124800036350886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,64,128,1,float16,float16,0,0.018885333091020584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,64,0,1,float16,float16,0,0.018079999834299088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,64,128,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,64,128,1,fp8,fp8,0,0.01878400022784869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,64,0,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,64,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,64,128,1,float16,float16,0,0.019509332875410717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,64,0,1,float16,float16,0,0.019071999937295914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,64,128,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,64,128,1,fp8,fp8,0,0.019861333072185516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,64,0,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,64,0,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,64,128,1,float16,float16,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,64,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,64,128,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,64,128,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,64,0,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,64,0,1,fp8,fp8,0,0.018794666975736618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,64,128,1,float16,float16,0,0.018837332725524902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,64,0,1,float16,float16,0,0.01903466631968816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,64,128,1,float16,fp8,0,0.019445333629846573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,64,128,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,64,0,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,64,0,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,64,128,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,64,128,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,64,0,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,64,128,1,float16,float16,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,64,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,64,128,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,64,0,1,fp8,fp8,0,0.016490666816631954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,64,128,1,float16,float16,0,0.015418666104475657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,64,128,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,64,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,64,128,1,float16,float16,0,0.01563199982047081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,64,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,64,128,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,64,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,64,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,64,128,1,float16,fp8,0,0.01647466669480006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,64,128,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,64,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,64,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,64,128,1,float16,float16,0,0.015301333119471868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,64,128,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,64,128,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,64,0,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,64,128,1,float16,float16,0,0.016037333756685257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,64,128,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,64,0,1,fp8,fp8,0,0.015605332950750986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,64,128,1,float16,fp8,0,0.016122666498025257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,64,0,1,float16,float16,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,64,0,1,float16,fp8,0,0.016469333320856094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,64,0,1,fp8,fp8,0,0.01747200017174085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,2,64,128,1,float16,float16,0,0.19166400035222372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,2,64,0,1,float16,float16,0,0.191594660282135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,2,64,128,1,float16,fp8,0,0.19206400712331137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,2,64,128,1,fp8,fp8,0,0.17523199319839478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,2,64,0,1,float16,fp8,0,0.19246933857599893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,2,64,0,1,fp8,fp8,0,0.17543466885884604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,4,64,128,1,float16,float16,0,0.1914880077044169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,4,64,0,1,float16,float16,0,0.19171732664108276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,4,64,128,1,float16,fp8,0,0.1917440096537272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,4,64,128,1,fp8,fp8,0,0.17511999607086182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,4,64,0,1,float16,fp8,0,0.1918613314628601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,4,64,0,1,fp8,fp8,0,0.17549333969751993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,8,64,128,1,float16,float16,0,0.19153066476186117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,8,64,0,1,float16,float16,0,0.1925493280092875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,8,64,128,1,float16,fp8,0,0.19271999597549438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,8,64,128,1,fp8,fp8,0,0.17493865887324014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,8,64,0,1,float16,fp8,0,0.19151999553044638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,40,64,128,1,float16,float16,0,0.10322667161623637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,0,0.10150933265686035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,8,64,0,1,fp8,fp8,0,0.17520000537236533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,40,64,128,1,float16,fp8,0,0.10317867000897725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,40,64,128,1,fp8,fp8,0,0.09532266855239868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,0,0.10300266742706299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,40,64,0,1,fp8,fp8,0,0.09487467010815938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,2,64,128,1,float16,float16,0,0.10331199566523235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,2,64,0,1,float16,float16,0,0.10162132978439331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,2,64,128,1,float16,fp8,0,0.10115733742713928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,2,64,128,1,fp8,fp8,0,0.09306666254997253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,2,64,0,1,float16,fp8,0,0.10358933607737224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,2,64,0,1,fp8,fp8,0,0.09315199653307597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,4,64,128,1,float16,float16,0,0.10152533650398254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,4,64,0,1,float16,float16,0,0.10335466265678406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,4,64,128,1,float16,fp8,0,0.10142399867375691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,4,64,128,1,fp8,fp8,0,0.09501333038012187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,4,64,0,1,float16,fp8,0,0.1034453312555949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,4,64,0,1,fp8,fp8,0,0.0946613351504008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,8,64,128,1,float16,float16,0,0.10175999999046326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,8,64,0,1,float16,float16,0,0.10128000378608704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,8,64,128,1,float16,fp8,0,0.1032960017522176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,8,64,128,1,fp8,fp8,0,0.09502399961153667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,8,64,0,1,float16,fp8,0,0.10295466581980388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,40,64,128,1,float16,float16,0,0.058277333776156105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,8,64,0,1,fp8,fp8,0,0.09297600388526917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,40,64,128,1,float16,fp8,0,0.05826666454474131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,40,64,128,1,fp8,fp8,0,0.05410666763782501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,0,0.05795733133951823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,0,0.05795200169086456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,40,64,0,1,fp8,fp8,0,0.05390933156013489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,2,64,128,1,float16,float16,0,0.05824000140031179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,2,64,0,1,float16,float16,0,0.05820266902446747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,2,64,128,1,float16,fp8,0,0.05795733133951823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,2,64,128,1,fp8,fp8,0,0.054325332244237266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,2,64,0,1,float16,fp8,0,0.058245331048965454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,2,64,0,1,fp8,fp8,0,0.05398933092753092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,4,64,128,1,float16,float16,0,0.05824000140031179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,4,64,0,1,float16,float16,0,0.058261334896087646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,4,64,128,1,float16,fp8,0,0.058143998185793556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,4,64,128,1,fp8,fp8,0,0.05398400127887726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,4,64,0,1,float16,fp8,0,0.058245331048965454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,4,64,0,1,fp8,fp8,0,0.05388266841570536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,8,64,128,1,float16,float16,0,0.05807466804981232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,8,64,0,1,float16,float16,0,0.058176000912984215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,8,64,128,1,float16,fp8,0,0.05938133100668589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,8,64,128,1,fp8,fp8,0,0.05407466491063436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,8,64,0,1,float16,fp8,0,0.05835199852784475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,8,64,0,1,fp8,fp8,0,0.054101333022117615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,40,64,128,1,float16,float16,0,0.037658666570981346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,40,64,128,1,float16,fp8,0,0.037274666130542755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,0,0.037445334096749626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,40,64,128,1,fp8,fp8,0,0.0347626656293869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,0,0.037317333122094475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,40,64,0,1,fp8,fp8,0,0.03534399966398875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,2,64,0,1,float16,float16,0,0.0359199990828832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,2,64,128,1,float16,fp8,0,0.03730666637420654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,2,64,128,1,float16,float16,0,0.03789333254098892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,2,64,128,1,fp8,fp8,0,0.035573333501815796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,2,64,0,1,float16,fp8,0,0.037392000357309975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,2,64,0,1,fp8,fp8,0,0.03538133452335993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,4,64,128,1,float16,float16,0,0.03824000060558319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,4,64,0,1,float16,float16,0,0.037765334049860634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,4,64,128,1,float16,fp8,0,0.03740799923737844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,4,64,128,1,fp8,fp8,0,0.0354720006386439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,4,64,0,1,float16,fp8,0,0.03783999880154928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,4,64,0,1,fp8,fp8,0,0.0356480007370313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,8,64,128,1,float16,float16,0,0.03745066622893015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,8,64,0,1,float16,float16,0,0.03799466788768768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,8,64,128,1,float16,fp8,0,0.03732266773780187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,8,64,128,1,fp8,fp8,0,0.03565333286921183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,8,64,0,1,float16,fp8,0,0.037952000896135964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,8,64,0,1,fp8,fp8,0,0.035642666121323906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,40,64,128,1,float16,float16,0,0.027215999861558277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,0,0.025557334224383037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,40,64,128,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,40,64,128,1,fp8,fp8,0,0.025461333493391674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,40,64,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,0,0.02722666660944621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,2,64,128,1,float16,float16,0,0.02571200082699458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,2,64,0,1,float16,float16,0,0.027210667729377747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,2,64,128,1,float16,fp8,0,0.02734400083621343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,2,64,128,1,fp8,fp8,0,0.02518933266401291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,2,64,0,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,2,64,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,4,64,128,1,float16,float16,0,0.027429332335789997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,4,64,0,1,float16,float16,0,0.02537599951028824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,4,64,128,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,4,64,128,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,4,64,0,1,float16,fp8,0,0.025637333591779072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,4,64,0,1,fp8,fp8,0,0.02536533276240031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,8,64,128,1,float16,float16,0,0.025455998877684276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,8,64,0,1,float16,float16,0,0.025402667621771496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,8,64,128,1,float16,fp8,0,0.02717866748571396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,8,64,128,1,fp8,fp8,0,0.025663999219735462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,8,64,0,1,float16,fp8,0,0.02566933383544286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,8,64,0,1,fp8,fp8,0,0.025477332373460133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,40,64,128,1,float16,float16,0,0.021274665991465252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,0,0.021269333859284718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,40,64,128,1,float16,fp8,0,0.02290133386850357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,40,64,128,1,fp8,fp8,0,0.02124800036350886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,40,64,0,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,2,64,128,1,float16,float16,0,0.021546666820844013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,2,64,0,1,float16,float16,0,0.021002667645613354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,2,64,128,1,float16,fp8,0,0.021381333470344543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,2,64,128,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,2,64,0,1,float16,fp8,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,2,64,0,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,4,64,128,1,float16,float16,0,0.021482666333516438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,4,64,0,1,float16,float16,0,0.020314666132132213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,4,64,128,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,4,64,128,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,4,64,0,1,float16,fp8,0,0.021114667256673176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,4,64,0,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,8,64,128,1,float16,float16,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,8,64,0,1,float16,float16,0,0.021018666525681812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,8,64,128,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,8,64,128,1,fp8,fp8,0,0.02126399924357732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,8,64,0,1,float16,fp8,0,0.02088533341884613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,8,64,0,1,fp8,fp8,0,0.020997333029905956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,40,64,128,1,float16,float16,0,0.017845333864291508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,0,0.019215999792019527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,40,64,128,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,40,64,128,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,40,64,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,2,64,128,1,float16,float16,0,0.01934933289885521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,2,64,0,1,float16,float16,0,0.017632000148296356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,2,64,128,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,2,64,128,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,2,64,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,2,64,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,4,64,128,1,float16,float16,0,0.01759999990463257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,4,64,0,1,float16,float16,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,4,64,128,1,float16,fp8,0,0.01987733319401741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,4,64,128,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,4,64,0,1,float16,fp8,0,0.01903466631968816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,4,64,0,1,fp8,fp8,0,0.01947733387351036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,8,64,128,1,float16,float16,0,0.019296000401178997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,8,64,0,1,float16,float16,0,0.019002666076024372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,8,64,128,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,8,64,128,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,8,64,0,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,8,64,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,40,64,128,1,float16,float16,0,0.016805333395799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,0,0.015306666493415833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,40,64,128,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,40,64,128,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,40,64,0,1,fp8,fp8,0,0.015850666910409927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,2,64,128,1,float16,float16,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,2,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,2,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,2,64,128,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,2,64,0,1,float16,fp8,0,0.016544000556071598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,2,64,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,4,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,4,64,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,4,64,128,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,4,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,4,64,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,8,64,128,1,float16,float16,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,4,64,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,8,64,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,8,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,8,64,128,1,fp8,fp8,0,0.015370666980743408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,8,64,0,1,fp8,fp8,0,0.015381333728631338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,8,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,40,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,40,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,40,64,128,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,0,0.015717333803574245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,40,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,2,64,128,1,float16,float16,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,2,64,128,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,2,64,0,1,float16,float16,0,0.015813333292802174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,2,64,128,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,2,64,0,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,4,64,128,1,float16,float16,0,0.015285332997639975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,2,64,0,1,fp8,fp8,0,0.015749332805474598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,4,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,4,64,128,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,4,64,128,1,fp8,fp8,0,0.01568000018596649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,4,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,4,64,0,1,fp8,fp8,0,0.015647999942302704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,8,64,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,8,64,128,1,float16,float16,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,8,64,128,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,8,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,8,64,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,8,64,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,64,128,1,float16,float16,0,1.738976001739502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,64,128,1,float16,fp8,0,1.7535893122355144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,64,128,1,fp8,fp8,0,1.6249972979227703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,64,128,1,float16,float16,0,1.7642399470011394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,64,0,1,float16,float16,0,10.553738911946615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,64,128,1,float16,fp8,0,1.777546723683675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,64,0,1,float16,fp8,0,10.903007507324219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,64,0,1,fp8,fp8,0,9.821541468302408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,64,128,1,fp8,fp8,0,1.6523680686950684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,64,128,1,float16,float16,0,1.7777759234110515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,64,0,1,float16,float16,0,10.853380839029947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,64,128,1,float16,fp8,0,1.7926026980082195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,64,128,1,fp8,fp8,0,1.675370693206787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,64,0,1,fp8,fp8,0,9.847578684488932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,64,0,1,float16,fp8,0,12.349013010660807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,64,128,1,float16,float16,0,1.8117599487304688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,64,0,1,float16,float16,0,10.62062962849935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,64,128,1,float16,fp8,0,1.8288480440775554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,64,128,1,fp8,fp8,0,1.7159892717997234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,64,0,1,fp8,fp8,0,9.869306564331055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,64,0,1,float16,fp8,0,10.929322560628256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,64,128,1,float16,float16,0,1.0346240202585857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,64,0,1,float16,float16,0,11.795541127522787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,64,128,1,float16,fp8,0,1.0594773292541504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,64,0,1,float16,float16,0,5.607834498087565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,64,128,1,fp8,fp8,0,1.003871997197469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,64,0,1,fp8,fp8,0,9.91208521525065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,64,0,1,float16,fp8,0,10.701211293538412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,64,128,1,float16,float16,0,0.9150079886118571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,64,128,1,float16,fp8,0,0.9224426746368408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,64,128,1,fp8,fp8,0,0.8558773199717203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,64,0,1,float16,fp8,0,5.823770523071289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,64,0,1,fp8,fp8,0,5.152341206868489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,64,0,1,float16,float16,0,5.369775772094727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,64,128,1,float16,float16,0,0.9189546902974447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,64,128,1,float16,fp8,0,0.9265973567962646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,64,128,1,fp8,fp8,0,0.8633226553599039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,64,0,1,float16,fp8,0,5.769253412882487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,64,0,1,fp8,fp8,0,5.002079963684082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,64,128,1,float16,float16,0,0.9266933600107828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,64,0,1,float16,float16,0,5.825685501098633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,64,128,1,float16,fp8,0,0.984229326248169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,64,128,1,fp8,fp8,0,0.8729600111643473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,64,0,1,float16,fp8,0,5.606032053629558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,64,0,1,fp8,fp8,0,5.009200096130371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,64,128,1,float16,float16,0,0.9410080115000407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,64,0,1,float16,float16,0,5.4390614827473955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,64,128,1,float16,fp8,0,0.9524106979370117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,64,128,1,fp8,fp8,0,0.8916213512420654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,64,0,1,fp8,fp8,0,5.018666585286458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,64,0,1,float16,fp8,0,5.398464202880859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,64,128,1,float16,float16,0,0.5732053518295288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,64,0,1,float16,float16,0,5.6183522542317705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,64,128,1,float16,fp8,0,0.5870453516642252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,64,0,1,float16,float16,0,2.8697760899861655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,64,128,1,fp8,fp8,0,0.5595359802246094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,64,0,1,fp8,fp8,0,5.039152145385742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,64,0,1,float16,fp8,0,5.531914393107097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,64,128,1,float16,float16,0,0.5150986512502035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,64,128,1,float16,fp8,0,0.5168533325195312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,64,0,1,fp8,fp8,0,2.6824318567911782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,64,0,1,float16,fp8,0,2.8822507858276367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,64,128,1,fp8,fp8,0,0.4863733450571696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,64,0,1,float16,float16,0,2.7947893142700195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,64,128,1,float16,float16,0,0.5158986647923788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,64,128,1,float16,fp8,0,0.5201706488927206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,64,0,1,fp8,fp8,0,2.608821392059326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,64,0,1,float16,fp8,0,2.976634661356608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,64,128,1,fp8,fp8,0,0.4903999964396159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,64,128,1,float16,float16,0,0.5202293395996094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,64,0,1,float16,float16,0,2.8714399337768555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,64,0,1,fp8,fp8,0,2.613039970397949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,64,0,1,float16,fp8,0,2.8028265635172525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,64,128,1,float16,fp8,0,0.5259093443552653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,64,128,1,fp8,fp8,0,0.4954613447189331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,64,0,1,float16,float16,0,2.803999900817871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,64,128,1,float16,float16,0,0.5274666547775269
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,64,0,1,fp8,fp8,0,2.6181227366129556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,64,0,1,float16,fp8,0,2.8261868158976235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,64,128,1,float16,fp8,0,0.5339306592941284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,64,128,1,fp8,fp8,0,0.5236800114313761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,64,0,1,float16,float16,0,2.8132054011027017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,64,128,1,float16,float16,0,0.4040373166402181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,64,128,1,float16,fp8,0,0.40407999356587726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,64,0,1,fp8,fp8,0,2.625802675882975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,64,0,1,float16,float16,0,1.602725346883138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,64,0,1,float16,fp8,0,2.821983973185221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,64,128,1,fp8,fp8,0,0.3840746482213338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,64,128,1,float16,float16,0,0.40250666936238605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,64,128,1,float16,fp8,0,0.402074654897054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,64,0,1,float16,fp8,0,1.6033813158671062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,64,0,1,fp8,fp8,0,1.491696039835612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,64,0,1,float16,float16,0,1.5938132603963215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,64,128,1,fp8,fp8,0,0.3814773162206014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,64,128,1,float16,float16,0,0.4036373297373454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,64,128,1,float16,fp8,0,0.40301867326100665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,64,0,1,fp8,fp8,0,1.4883839289347331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,64,0,1,float16,fp8,0,1.6187146504720051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,64,0,1,float16,float16,0,1.597040017445882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,64,128,1,fp8,fp8,0,0.38176000118255615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,64,128,1,float16,float16,0,0.4026240110397339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,64,0,1,float16,fp8,0,1.5966347058614094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,64,0,1,fp8,fp8,0,1.491744041442871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,64,128,1,float16,fp8,0,0.4023253520329793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,64,0,1,float16,float16,0,1.595354715983073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,64,128,1,fp8,fp8,0,0.3818133274714152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,64,128,1,float16,float16,0,0.4030933380126953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,64,0,1,fp8,fp8,0,1.489258607228597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,64,0,1,float16,fp8,0,1.5972639719645183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,64,128,1,float16,fp8,0,0.40436800320943195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,64,128,1,fp8,fp8,0,0.3819520076115926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,64,0,1,float16,float16,0,1.597856044769287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,64,0,1,float16,fp8,0,1.6255466143290203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,64,0,1,fp8,fp8,0,1.4914612770080566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,64,128,1,float16,float16,0,1.301088015238444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,64,128,1,float16,fp8,0,1.3110133012135823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,64,128,1,fp8,fp8,0,1.2110986709594727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,64,128,1,float16,float16,0,1.3083573182423909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,64,0,1,float16,float16,0,6.636453628540039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,64,0,1,float16,fp8,0,6.240293502807617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,64,128,1,float16,fp8,0,1.319109360376994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,64,0,1,fp8,fp8,0,5.7995255788167315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,64,128,1,fp8,fp8,0,1.2264053026835124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,64,128,1,float16,float16,0,1.3197866280873616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,64,0,1,float16,float16,0,6.269482930501302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,64,128,1,float16,fp8,0,1.3312266667683919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,64,128,1,fp8,fp8,0,1.2416426340738933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,64,0,1,fp8,fp8,0,5.813989639282227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,64,0,1,float16,fp8,0,6.3685652414957685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,64,128,1,float16,float16,0,1.341466744740804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,64,0,1,float16,float16,0,6.257424036661784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,64,128,1,float16,fp8,0,1.3564747174580891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,64,0,1,fp8,fp8,0,5.8292802174886065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,64,128,1,fp8,fp8,0,1.2717013359069824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,64,0,1,float16,fp8,0,6.30410639444987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,64,128,1,float16,float16,0,0.779584010442098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,64,0,1,float16,float16,0,6.922229131062825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,64,128,1,float16,fp8,0,0.7974560260772705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,64,0,1,float16,float16,0,3.381082534790039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,64,128,1,fp8,fp8,0,0.756879965464274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,64,0,1,fp8,fp8,0,5.860421498616536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,64,0,1,float16,fp8,0,6.66001574198405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,64,128,1,float16,float16,0,0.6908266544342041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,64,128,1,float16,fp8,0,0.6968692938486735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,64,0,1,float16,fp8,0,3.3217973709106445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,64,0,1,fp8,fp8,0,3.0878880818684897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,64,128,1,fp8,fp8,0,0.6480853160222372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,64,0,1,float16,float16,0,3.355423927307129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,64,128,1,float16,float16,0,0.6932426293691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,64,128,1,float16,fp8,0,0.7005493640899658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,64,0,1,fp8,fp8,0,2.9766880671183267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,64,0,1,float16,fp8,0,3.2633387247721353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,64,128,1,fp8,fp8,0,0.6538559993108114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,64,128,1,float16,float16,0,0.7003839810689291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,64,0,1,float16,float16,0,3.439296086629232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,64,128,1,float16,fp8,0,0.7056746482849121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,64,0,1,fp8,fp8,0,2.9827680587768555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,64,0,1,float16,fp8,0,3.374709447224935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,64,128,1,fp8,fp8,0,0.6593439976374308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,64,0,1,float16,float16,0,3.2086400985717773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,64,128,1,float16,float16,0,0.710261344909668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,64,128,1,float16,fp8,0,0.7191893259684244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,64,128,1,fp8,fp8,0,0.6744106610616049
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,64,0,1,fp8,fp8,0,2.989210764567057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,64,0,1,float16,fp8,0,3.270928064982096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,64,128,1,float16,float16,0,0.4344853162765503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,64,0,1,float16,float16,0,3.3084961573282876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,64,128,1,float16,fp8,0,0.4452586571375529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,64,128,1,fp8,fp8,0,0.42685866355895996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,64,0,1,float16,float16,0,1.7350400288899739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,64,0,1,fp8,fp8,0,3.0026772816975913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,64,0,1,float16,fp8,0,3.280442555745443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,64,128,1,float16,float16,0,0.4013386567433675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,64,0,1,float16,fp8,0,1.7478826840718586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,64,0,1,fp8,fp8,0,1.6282560030619304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,64,128,1,float16,fp8,0,0.3951359987258911
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,64,128,1,fp8,fp8,0,0.3720693190892537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,64,0,1,float16,float16,0,1.6802026430765789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,64,128,1,float16,float16,0,0.39405866463979083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,64,0,1,float16,fp8,0,1.68505064646403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,64,0,1,fp8,fp8,0,1.572005271911621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,64,128,1,float16,fp8,0,0.39671464761098224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,64,128,1,fp8,fp8,0,0.3740319808324178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,64,0,1,float16,float16,0,1.683733304341634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,64,128,1,float16,float16,0,0.3966133197148641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,64,0,1,fp8,fp8,0,1.5750932693481445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,64,0,1,float16,fp8,0,1.6874826749165852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,64,128,1,fp8,fp8,0,0.37857600053151447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,64,128,1,float16,fp8,0,0.4012053410212199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,64,0,1,float16,float16,0,1.6908373832702637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,64,128,1,float16,float16,0,0.40296534697214764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,64,128,1,float16,fp8,0,0.40754131476084393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,64,0,1,float16,fp8,0,1.692837397257487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,64,0,1,fp8,fp8,0,1.5790613492329915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,64,128,1,fp8,fp8,0,0.3837759892145793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,64,0,1,float16,float16,0,1.7468746503194172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,64,128,1,float16,float16,0,0.30793599287668866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,64,128,1,float16,fp8,0,0.3081760009129842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,64,0,1,fp8,fp8,0,1.5856587092081706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,64,0,1,float16,fp8,0,1.7013707160949707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,64,0,1,float16,float16,0,0.9985653559366862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,64,128,1,fp8,fp8,0,0.2918879985809326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,64,128,1,float16,float16,0,0.3083359996477763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,64,0,1,float16,fp8,0,1.0002933343251545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,64,0,1,fp8,fp8,0,0.9305280049641927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,64,128,1,float16,fp8,0,0.3081973393758138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,64,0,1,float16,float16,0,0.9906453291575114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,64,128,1,fp8,fp8,0,0.2919573386510213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,64,128,1,float16,float16,0,0.3054453333218892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,64,0,1,fp8,fp8,0,0.9283573627471924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,64,0,1,float16,fp8,0,0.9914560317993164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,64,128,1,float16,fp8,0,0.30590399106343585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,64,128,1,fp8,fp8,0,0.29283734162648517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,64,0,1,float16,float16,0,0.9936426480611166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,64,128,1,float16,float16,0,0.30822932720184326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,64,0,1,fp8,fp8,0,0.9276479880015055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,64,0,1,float16,fp8,0,0.9921813011169434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,64,128,1,float16,fp8,0,0.3078666726748149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,64,128,1,fp8,fp8,0,0.2917813261349996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,64,0,1,float16,float16,0,0.9958613713582357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,64,128,1,float16,float16,0,0.3060320019721985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,64,0,1,float16,fp8,0,0.9935680230458578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,64,0,1,fp8,fp8,0,0.9267893632253011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,64,128,1,float16,fp8,0,0.30754133065541583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,64,0,1,float16,float16,0,0.9982133706410726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,64,128,1,fp8,fp8,0,0.29375465710957843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,64,0,1,float16,fp8,0,0.9939200083414713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,64,0,1,fp8,fp8,0,0.9269920190175375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,64,128,1,float16,float16,0,1.0838773250579834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,64,128,1,fp8,fp8,0,1.0098026593526204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,64,128,1,float16,fp8,0,1.0931359926859539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,64,128,1,float16,float16,0,1.0887839794158936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,64,0,1,float16,float16,0,4.598613421122233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,64,0,1,float16,fp8,0,4.500058809916179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,64,128,1,float16,fp8,0,1.1000800132751465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,64,0,1,fp8,fp8,0,4.178741455078125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,64,128,1,fp8,fp8,0,1.0194453398386638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,64,0,1,float16,float16,0,4.700026512145996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,64,128,1,float16,float16,0,1.0992159843444824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,64,128,1,float16,fp8,0,1.1105546951293945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,64,0,1,fp8,fp8,0,4.189098676045735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,64,0,1,float16,fp8,0,4.598501205444336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,64,128,1,fp8,fp8,0,1.0323839982350667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,64,128,1,float16,float16,0,1.118127981821696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,64,0,1,float16,float16,0,4.673343976338704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,64,128,1,float16,fp8,0,1.1540746688842773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,64,0,1,fp8,fp8,0,4.201546669006348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,64,0,1,float16,fp8,0,4.674389203389485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,64,128,1,fp8,fp8,0,1.0553759733835857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,64,128,1,float16,float16,0,0.6500000158945719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,64,0,1,float16,float16,0,4.5394026438395185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,64,128,1,float16,fp8,0,0.6657706499099731
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,64,128,1,fp8,fp8,0,0.6317866643269857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,64,0,1,float16,float16,0,2.402554670969645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,64,0,1,float16,fp8,0,4.620682716369629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,64,0,1,fp8,fp8,0,4.224960009256999
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,64,128,1,float16,float16,0,0.5787413517634074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,64,128,1,float16,fp8,0,0.5810293356577555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,64,0,1,float16,fp8,0,2.4199040730794272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,64,0,1,fp8,fp8,0,2.248784065246582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,64,128,1,fp8,fp8,0,0.5420053402582804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,64,0,1,float16,float16,0,2.344821294148763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,64,128,1,float16,float16,0,0.5794879992802938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,64,128,1,float16,fp8,0,0.5840533177057902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,64,0,1,float16,fp8,0,2.31986665725708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,64,0,1,fp8,fp8,0,2.158474604288737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,64,128,1,fp8,fp8,0,0.5476799805959066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,64,0,1,float16,float16,0,2.399797280629476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,64,128,1,float16,float16,0,0.5845866600672404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,64,128,1,float16,fp8,0,0.5906560023625692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,64,0,1,fp8,fp8,0,2.162831942240397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,64,0,1,float16,fp8,0,2.323520024617513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,64,128,1,fp8,fp8,0,0.5534559885660807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,64,0,1,float16,float16,0,2.354314645131429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,64,128,1,float16,float16,0,0.5934133529663086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,64,0,1,float16,fp8,0,2.330005327860514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,64,0,1,fp8,fp8,0,2.1671999295552573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,64,128,1,float16,fp8,0,0.6009920040766398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,64,128,1,fp8,fp8,0,0.5639573335647583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,64,0,1,float16,float16,0,2.338047981262207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,64,128,1,float16,float16,0,0.36394667625427246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,64,0,1,float16,fp8,0,2.344106674194336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,64,0,1,fp8,fp8,0,2.1792747179667153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,64,0,1,float16,float16,0,1.273253361384074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,64,128,1,float16,fp8,0,0.3736639817555745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,64,128,1,fp8,fp8,0,0.35628799597422284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,64,128,1,float16,float16,0,0.325658659140269
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,64,0,1,float16,fp8,0,1.2824160257975261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,64,0,1,fp8,fp8,0,1.1967679659525554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,64,128,1,float16,fp8,0,0.3280426661173503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,64,128,1,fp8,fp8,0,0.3102186719576518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,64,0,1,float16,float16,0,1.2273333072662354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,64,128,1,float16,float16,0,0.32731199264526367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,64,0,1,float16,fp8,0,1.2312853336334229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,64,0,1,fp8,fp8,0,1.1493173440297444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,64,128,1,float16,fp8,0,0.33023999134699505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,64,0,1,float16,float16,0,1.2288213570912678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,64,128,1,fp8,fp8,0,0.31329067548116046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,64,128,1,float16,float16,0,0.33085334300994873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,64,0,1,fp8,fp8,0,1.1515680154164631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,64,0,1,float16,fp8,0,1.2316693464914958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,64,128,1,float16,fp8,0,0.33450134595235187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,64,0,1,float16,float16,0,1.2359413305918376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,64,128,1,fp8,fp8,0,0.31567466259002686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,64,128,1,float16,float16,0,0.33560001850128174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,64,0,1,float16,fp8,0,1.237829367319743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,64,0,1,fp8,fp8,0,1.1557013193766277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,64,128,1,float16,fp8,0,0.33930667241414386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,64,0,1,float16,float16,0,1.240015983581543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,64,128,1,fp8,fp8,0,0.3228800098101298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,64,128,1,float16,float16,0,0.25889066855112713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,64,0,1,float16,float16,0,0.7487839857737223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,64,128,1,float16,fp8,0,0.2611733277638753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,64,0,1,fp8,fp8,0,1.1607786814371746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,64,0,1,float16,fp8,0,1.2438666820526123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,64,128,1,fp8,fp8,0,0.24847465753555298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,64,128,1,float16,float16,0,0.2555466691652934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,64,0,1,float16,fp8,0,0.749013344446818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,64,0,1,fp8,fp8,0,0.6965813636779785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,64,128,1,float16,fp8,0,0.25659199555714923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,64,128,1,fp8,fp8,0,0.24457067251205444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,64,0,1,float16,float16,0,0.7411626974741617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,64,128,1,float16,float16,0,0.2569440007209778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,64,0,1,float16,fp8,0,0.7439253330230713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,64,0,1,fp8,fp8,0,0.6929972966512045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,64,0,1,float16,float16,0,0.7408053080240885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,64,128,1,float16,fp8,0,0.25570134321848553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,64,128,1,fp8,fp8,0,0.24488532543182373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,64,128,1,float16,float16,0,0.2555626630783081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,64,0,1,float16,fp8,0,0.7406026522318522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,64,0,1,fp8,fp8,0,0.6948320070902506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,64,0,1,float16,float16,0,0.7435359954833984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,64,128,1,float16,fp8,0,0.2553013364473979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,64,128,1,fp8,fp8,0,0.24486400683720908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,64,128,1,float16,float16,0,0.25684799750645954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,64,0,1,float16,fp8,0,0.7417066891988119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,64,0,1,fp8,fp8,0,0.6959679921468099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,64,128,1,float16,fp8,0,0.2577600081761678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,64,0,1,float16,float16,0,0.7427146434783936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,64,128,1,fp8,fp8,0,0.24702399969100952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,64,0,1,float16,fp8,0,0.7459626992543539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,64,0,1,fp8,fp8,0,0.6951839923858643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,64,128,1,float16,float16,0,1.691381295522054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,64,128,1,fp8,fp8,0,1.575658639272054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,64,128,1,float16,fp8,0,1.7055892944335938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,64,128,1,float16,float16,0,1.7153654098510742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,64,0,1,float16,float16,0,6.1237335205078125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,64,0,1,fp8,fp8,0,5.5468800862630205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,64,0,1,float16,fp8,0,5.977455774943034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,64,128,1,float16,fp8,0,1.7282293637593586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,64,128,1,fp8,fp8,0,1.6042772928873699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,64,0,1,float16,float16,0,6.00108273824056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,64,128,1,float16,float16,0,1.7315093676249187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,64,128,1,float16,fp8,0,1.7456960678100586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,64,0,1,fp8,fp8,0,5.577413558959961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,64,0,1,float16,fp8,0,6.058261235555013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,64,128,1,fp8,fp8,0,1.6257972717285156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,64,0,1,float16,float16,0,6.016127904256185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,64,128,1,float16,float16,0,1.7644799550374348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,64,128,1,float16,fp8,0,1.77948792775472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,64,0,1,fp8,fp8,0,5.599744160970052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,64,0,1,float16,fp8,0,6.367471694946289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,64,128,1,fp8,fp8,0,1.6653332710266113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,64,128,1,float16,float16,0,0.9907039801279703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,64,0,1,float16,float16,0,6.0617014567057295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,64,128,1,float16,fp8,0,1.0116480191548665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,64,128,1,fp8,fp8,0,0.9573760032653809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,64,0,1,float16,fp8,0,6.2445494333903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,64,0,1,float16,float16,0,3.35152530670166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,64,0,1,fp8,fp8,0,5.639455795288086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,64,128,1,float16,float16,0,0.8685812950134277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,64,0,1,float16,fp8,0,3.199930508931478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,64,0,1,fp8,fp8,0,2.9696054458618164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,64,128,1,float16,fp8,0,0.8764906724294027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,64,128,1,fp8,fp8,0,0.8101440270741781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,64,0,1,float16,float16,0,3.103935877482096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,64,128,1,float16,float16,0,0.8723680178324381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,64,0,1,float16,fp8,0,3.0410292943318686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,64,0,1,fp8,fp8,0,2.821573257446289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,64,128,1,float16,fp8,0,0.8798133532206217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,64,128,1,fp8,fp8,0,0.8174986839294434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,64,0,1,float16,float16,0,3.1579678853352866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,64,128,1,float16,float16,0,0.8792746861775717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,64,128,1,float16,fp8,0,0.8874293168385824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,64,0,1,float16,fp8,0,3.0556907653808594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,64,0,1,fp8,fp8,0,2.8290878931681314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,64,128,1,fp8,fp8,0,0.8257066408793131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,64,0,1,float16,float16,0,3.0485973358154297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,64,128,1,float16,float16,0,0.8934293588002523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,64,128,1,float16,fp8,0,0.9052960077921549
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,64,0,1,fp8,fp8,0,2.8385066986083984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,64,0,1,float16,fp8,0,3.1220267613728843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,64,128,1,fp8,fp8,0,0.8449760278066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,64,0,1,float16,float16,0,3.1372480392456055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,64,128,1,float16,float16,0,0.5235040187835693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,64,128,1,float16,fp8,0,0.5357439915339152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,64,0,1,fp8,fp8,0,2.8552640279134116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,64,0,1,float16,fp8,0,3.0808693567911782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,64,128,1,fp8,fp8,0,0.5085440079371134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,64,0,1,float16,float16,0,1.690351963043213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,64,128,1,float16,float16,0,0.4620373249053955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,64,128,1,float16,fp8,0,0.4654239813486735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,64,0,1,float16,fp8,0,1.653765360514323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,64,0,1,fp8,fp8,0,1.5399680137634277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,64,0,1,float16,float16,0,1.5715786616007488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,64,128,1,fp8,fp8,0,0.4370986620585124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,64,128,1,float16,float16,0,0.465887983640035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,64,128,1,float16,fp8,0,0.470304012298584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,64,0,1,fp8,fp8,0,1.4677707354227703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,64,0,1,float16,fp8,0,1.5746986071268718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,64,0,1,float16,float16,0,1.576325257619222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,64,128,1,fp8,fp8,0,0.4408959945042928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,64,128,1,float16,float16,0,0.4696799914042155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,64,0,1,fp8,fp8,0,1.4715627034505208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,64,0,1,float16,fp8,0,1.5805120468139648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,64,128,1,float16,fp8,0,0.47365331649780273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,64,0,1,float16,float16,0,1.5826346079508464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,64,128,1,fp8,fp8,0,0.44575464725494385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,64,128,1,float16,float16,0,0.47816534837086994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,64,0,1,fp8,fp8,0,1.4757173856099446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,64,0,1,float16,fp8,0,1.586714744567871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,64,128,1,float16,fp8,0,0.4840373198191325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,64,0,1,float16,float16,0,1.5910186767578125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,64,128,1,fp8,fp8,0,0.4546080032984416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,64,128,1,float16,float16,0,0.2985386649767558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,64,128,1,float16,fp8,0,0.3040906588236491
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,64,0,1,float16,float16,0,0.8802719910939535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,64,0,1,float16,fp8,0,1.5994346936543782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,64,0,1,fp8,fp8,0,1.484890619913737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,64,128,1,fp8,fp8,0,0.28967465957005817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,64,128,1,float16,float16,0,0.2614346742630005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,64,0,1,fp8,fp8,0,0.8302453358968099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,64,0,1,float16,fp8,0,0.9142186641693115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,64,128,1,float16,fp8,0,0.26419732968012494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,64,0,1,float16,float16,0,0.8413546880086263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,64,128,1,fp8,fp8,0,0.2528640031814575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,64,128,1,float16,float16,0,0.26472532749176025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,64,0,1,float16,fp8,0,0.8440907001495361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,64,0,1,fp8,fp8,0,0.7911252975463867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,64,128,1,float16,fp8,0,0.2667466600735982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,64,0,1,float16,float16,0,0.8458773295084635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,64,128,1,fp8,fp8,0,0.25464000304539997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,64,128,1,float16,float16,0,0.26740266879399616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,64,0,1,float16,fp8,0,0.8463253180185953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,64,0,1,fp8,fp8,0,0.7942240238189697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,64,128,1,float16,fp8,0,0.2703733245531718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,64,0,1,float16,float16,0,0.8482933044433594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,64,128,1,fp8,fp8,0,0.25758934020996094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,64,128,1,float16,float16,0,0.27267734209696454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,64,0,1,float16,fp8,0,0.8514506816864014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,64,0,1,fp8,fp8,0,0.7962559858957926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,64,128,1,float16,fp8,0,0.27525333563486737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,64,0,1,float16,float16,0,0.8557600180308024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,64,128,1,fp8,fp8,0,0.2630293369293213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,64,128,1,float16,float16,0,0.20975999037424722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,64,128,1,float16,fp8,0,0.21018133560816446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,64,0,1,float16,fp8,0,0.8594293594360352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,64,0,1,fp8,fp8,0,0.8018133640289307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,64,0,1,float16,float16,0,0.5306986570358276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,64,128,1,fp8,fp8,0,0.19961599508921304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,64,128,1,float16,float16,0,0.2074293295542399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,64,0,1,fp8,fp8,0,0.49458666642506915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,64,0,1,float16,fp8,0,0.5334506829579672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,64,128,1,float16,fp8,0,0.2076746622721354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,64,128,1,fp8,fp8,0,0.19730132818222046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,64,0,1,float16,float16,0,0.5267839829126993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,64,0,1,float16,fp8,0,0.5279839833577474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,64,128,1,float16,float16,0,0.20577067136764526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,64,0,1,fp8,fp8,0,0.49237334728240967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,64,0,1,float16,float16,0,0.5266986687978109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,64,128,1,float16,fp8,0,0.20558400948842367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,64,128,1,fp8,fp8,0,0.1967946688334147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,64,128,1,float16,float16,0,0.2058239976565043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,64,0,1,float16,fp8,0,0.5246400038401285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,64,0,1,fp8,fp8,0,0.49084266026814777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,64,128,1,float16,fp8,0,0.20739734172821045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,64,128,1,fp8,fp8,0,0.19848533471425375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,64,0,1,float16,float16,0,0.5277119874954224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,64,0,1,float16,fp8,0,0.5272159973780314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,64,128,1,float16,float16,0,0.20585066080093384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,64,0,1,fp8,fp8,0,0.4940426747004191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,64,128,1,float16,fp8,0,0.2069279948870341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,64,128,1,fp8,fp8,0,0.19925866524378458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,64,0,1,float16,float16,0,0.527018666267395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,64,0,1,float16,fp8,0,0.5276799996693929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,64,0,1,fp8,fp8,0,0.4944213231404622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,64,128,1,float16,float16,0,1.2622826894124348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,64,128,1,float16,fp8,0,1.273738702138265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,64,128,1,fp8,fp8,0,1.1730133692423503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,64,0,1,float16,float16,0,3.6197067896525064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,64,128,1,float16,float16,0,1.2724693616231282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,64,0,1,float16,fp8,0,3.632229487101237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,64,0,1,fp8,fp8,0,3.360645294189453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,64,128,1,float16,fp8,0,1.2844479878743489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,64,128,1,fp8,fp8,0,1.1898346741994221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,64,0,1,float16,float16,0,3.6951465606689453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,64,128,1,float16,float16,0,1.2826613585154216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,64,0,1,float16,fp8,0,3.6966028213500977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,64,128,1,float16,fp8,0,1.2957546710968018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,64,0,1,fp8,fp8,0,3.3788318634033203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,64,128,1,fp8,fp8,0,1.2032000223795574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,64,0,1,float16,float16,0,3.7203038533528647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,64,128,1,float16,float16,0,1.3083679676055908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,64,0,1,float16,fp8,0,3.6679999033610025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,64,0,1,fp8,fp8,0,3.3927253087361655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,64,128,1,float16,fp8,0,1.321290651957194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,64,128,1,fp8,fp8,0,1.2354719638824463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,64,0,1,float16,float16,0,3.7593921025594077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,64,128,1,float16,float16,0,0.7465866406758627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,64,128,1,float16,fp8,0,0.7616106669108073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,64,0,1,float16,fp8,0,3.6913814544677734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,64,0,1,fp8,fp8,0,3.425466537475586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,64,128,1,fp8,fp8,0,0.7197759946187338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,64,0,1,float16,float16,0,1.9589279492696126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,64,128,1,float16,float16,0,0.6548266808191935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,64,0,1,float16,fp8,0,2.0701866149902344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,64,128,1,float16,fp8,0,0.6606826782226562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,64,0,1,fp8,fp8,0,1.8332640329996746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,64,0,1,float16,float16,0,1.8600212732950847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,64,128,1,fp8,fp8,0,0.6118880112965902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,64,128,1,float16,float16,0,0.6610933144887289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,64,0,1,fp8,fp8,0,1.7248853047688801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,64,0,1,float16,fp8,0,1.8587573369344075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,64,128,1,float16,fp8,0,0.6658560037612915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,64,128,1,fp8,fp8,0,0.6193813482920328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,64,0,1,float16,float16,0,1.8564693133036296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,64,128,1,float16,float16,0,0.6648960113525391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,64,0,1,fp8,fp8,0,1.7306933403015137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,64,0,1,float16,fp8,0,1.864672025044759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,64,128,1,float16,fp8,0,0.6722666422526041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,64,128,1,fp8,fp8,0,0.624778668085734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,64,0,1,float16,float16,0,1.865338643391927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,64,128,1,float16,float16,0,0.6753653685251871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,64,0,1,float16,fp8,0,1.8723626136779785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,64,0,1,fp8,fp8,0,1.7370986938476562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,64,128,1,float16,fp8,0,0.6828426520029703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,64,128,1,fp8,fp8,0,0.6377173264821371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,64,0,1,float16,float16,0,1.89027738571167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,64,128,1,float16,float16,0,0.3997226556142171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,64,0,1,float16,float16,0,1.0234133402506511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,64,128,1,float16,fp8,0,0.4094613393147786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,64,0,1,float16,fp8,0,1.8875892957051594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,64,0,1,fp8,fp8,0,1.7511839866638184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,64,128,1,fp8,fp8,0,0.38731733957926434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,64,128,1,float16,float16,0,0.3526826699574788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,64,0,1,float16,fp8,0,1.0346613725026448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,64,0,1,fp8,fp8,0,0.9615146319071451
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,64,128,1,float16,fp8,0,0.35495467980702716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,64,128,1,fp8,fp8,0,0.3350613514582316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,64,0,1,float16,float16,0,0.9686773618062338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,64,128,1,float16,float16,0,0.3537173271179199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,64,0,1,float16,fp8,0,0.9718026320139567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,64,0,1,fp8,fp8,0,0.9071626663208008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,64,128,1,float16,fp8,0,0.3573919932047526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,64,0,1,float16,float16,0,0.9723359743754069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,64,128,1,fp8,fp8,0,0.3376266558965047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,64,128,1,float16,float16,0,0.3588693141937256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,64,0,1,float16,fp8,0,0.9749066829681396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,64,0,1,fp8,fp8,0,0.9104800224304199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,64,128,1,float16,fp8,0,0.36210131645202637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,64,0,1,float16,float16,0,0.9779840310414633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,64,128,1,fp8,fp8,0,0.3404266834259033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,64,128,1,float16,float16,0,0.36559466520945233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,64,0,1,fp8,fp8,0,0.9142986933390299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,64,0,1,float16,fp8,0,0.9815839926401774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,64,128,1,float16,fp8,0,0.3694719870885213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,64,0,1,float16,float16,0,0.9871893723805746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,64,128,1,fp8,fp8,0,0.3488159974416097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,64,128,1,float16,float16,0,0.22817599773406982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,64,0,1,float16,fp8,0,0.9886346658070883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,64,128,1,float16,fp8,0,0.2336853345235189
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,64,0,1,float16,float16,0,0.5597013235092163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,64,0,1,fp8,fp8,0,0.9203360080718994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,64,128,1,fp8,fp8,0,0.2241599957148234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,64,0,1,float16,fp8,0,0.5651093324025472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,64,128,1,float16,float16,0,0.20042133331298828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,64,0,1,fp8,fp8,0,0.5272213220596313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,64,128,1,float16,fp8,0,0.2016800045967102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,64,128,1,fp8,fp8,0,0.19565333922704062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,64,0,1,float16,float16,0,0.5284906625747681
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,64,128,1,float16,float16,0,0.20168532927831015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,64,0,1,float16,fp8,0,0.5299413204193115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,64,0,1,fp8,fp8,0,0.49876801172892254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,64,128,1,fp8,fp8,0,0.19631467262903848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,64,128,1,float16,fp8,0,0.20354666312535605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,64,0,1,float16,float16,0,0.5321280161539713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,64,0,1,fp8,fp8,0,0.4994613329569499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,64,128,1,float16,float16,0,0.20364266633987427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,64,0,1,float16,fp8,0,0.5298986832300822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,64,128,1,float16,fp8,0,0.20578134059906006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,64,128,1,fp8,fp8,0,0.19817600647608438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,64,0,1,float16,float16,0,0.5327626864115397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,64,0,1,fp8,fp8,0,0.5015146732330322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,64,128,1,float16,float16,0,0.2076639930407206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,64,0,1,float16,fp8,0,0.5345333417256674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,64,128,1,float16,fp8,0,0.21124800046284994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,64,128,1,fp8,fp8,0,0.20254399379094443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,64,0,1,float16,float16,0,0.5385706822077433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,64,128,1,float16,float16,0,0.16497600078582764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,64,0,1,fp8,fp8,0,0.5075039863586426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,64,0,1,float16,fp8,0,0.5415306488672892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,64,128,1,float16,fp8,0,0.16331199804941812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,64,128,1,fp8,fp8,0,0.15651733676592508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,64,0,1,float16,float16,0,0.3508960008621216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,64,128,1,float16,float16,0,0.16235733032226562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,64,0,1,fp8,fp8,0,0.32613333066304523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,64,0,1,float16,fp8,0,0.3511146704355876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,64,128,1,float16,fp8,0,0.16169599692026773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,64,128,1,fp8,fp8,0,0.15456533432006836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,64,0,1,float16,float16,0,0.3447200059890747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,64,128,1,float16,float16,0,0.1606773336728414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,64,0,1,fp8,fp8,0,0.32443734010060626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,64,0,1,float16,fp8,0,0.34612266222635907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,64,128,1,float16,fp8,0,0.1604693333307902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,64,128,1,fp8,fp8,0,0.15219733119010925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,64,0,1,float16,float16,0,0.34628268082936603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,64,0,1,float16,fp8,0,0.34603198369344074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,64,128,1,float16,float16,0,0.16133866707483926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,64,0,1,fp8,fp8,0,0.3222879966100057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,64,0,1,float16,float16,0,0.3468159834543864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,64,128,1,float16,fp8,0,0.16153066356976828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,64,128,1,fp8,fp8,0,0.1546453336874644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,64,0,1,float16,fp8,0,0.34727998574574787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,64,128,1,float16,float16,0,0.16284799575805664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,64,0,1,fp8,fp8,0,0.3227519989013672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,64,0,1,float16,float16,0,0.34649066130320233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,64,128,1,float16,fp8,0,0.16051200032234192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,64,128,1,fp8,fp8,0,0.1534346640110016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,64,0,1,float16,fp8,0,0.3481173515319824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,64,0,1,fp8,fp8,0,0.3211946686108907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,64,128,1,float16,float16,0,1.6700000762939453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,64,128,1,float16,fp8,0,1.6821866035461426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,64,128,1,fp8,fp8,0,1.548986593882243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,64,0,1,float16,float16,0,3.648319880167643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,64,128,1,float16,float16,0,1.6890400250752766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,64,0,1,float16,fp8,0,3.659424146016439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,64,0,1,fp8,fp8,0,3.385786692301432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,64,128,1,float16,fp8,0,1.7023359934488933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,64,128,1,fp8,fp8,0,1.5803093910217285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,64,0,1,float16,float16,0,3.6713120142618814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,64,128,1,float16,float16,0,1.7070719401041667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,64,0,1,float16,fp8,0,3.6839307149251304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,64,0,1,fp8,fp8,0,3.413402557373047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,64,128,1,float16,fp8,0,1.7200320561726887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,64,128,1,fp8,fp8,0,1.5988426208496094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,64,0,1,float16,float16,0,3.6912053426106772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,64,128,1,float16,float16,0,1.7433759371439617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,64,0,1,float16,fp8,0,3.7936798731486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,64,0,1,fp8,fp8,0,3.4345439275105796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,64,128,1,float16,fp8,0,1.7574933369954426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,64,0,1,float16,float16,0,3.7337280909220376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,64,128,1,fp8,fp8,0,1.6405812899271648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,64,128,1,float16,float16,0,0.9684960047403971
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,64,0,1,float16,float16,0,1.9860000610351562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,64,128,1,float16,fp8,0,0.9846879641215006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,64,0,1,float16,fp8,0,3.7487147649129233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,64,128,1,fp8,fp8,0,0.9491360187530518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,64,0,1,fp8,fp8,0,3.47761599222819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,64,128,1,float16,float16,0,0.8426026503245035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,64,0,1,float16,fp8,0,2.00381867090861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,64,0,1,fp8,fp8,0,1.8619200388590496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,64,0,1,float16,float16,0,1.8456907272338867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,64,128,1,float16,fp8,0,0.8505067030588785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,64,128,1,fp8,fp8,0,0.7857279777526855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,64,128,1,float16,float16,0,0.8496586481730143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,64,0,1,fp8,fp8,0,1.7149866422017415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,64,0,1,float16,fp8,0,1.8527946472167969
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,64,0,1,float16,float16,0,1.8525759379069011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,64,128,1,float16,fp8,0,0.8573760191599528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,64,128,1,fp8,fp8,0,0.7944533030192057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,64,128,1,float16,float16,0,0.8560480276743571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,64,0,1,fp8,fp8,0,1.7218720118204753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,64,0,1,float16,fp8,0,1.862229347229004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,64,128,1,fp8,fp8,0,0.8005812962849935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,64,128,1,float16,fp8,0,0.8651573657989502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,64,0,1,float16,float16,0,1.8601279258728027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,64,128,1,float16,float16,0,0.8710933526357015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,64,0,1,fp8,fp8,0,1.7307039896647136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,64,0,1,float16,fp8,0,1.8711466789245605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,64,128,1,float16,fp8,0,0.8817120393117269
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,64,128,1,fp8,fp8,0,0.8202826976776123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,64,0,1,float16,float16,0,1.8786667188008626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,64,128,1,float16,float16,0,0.5025440057118734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,64,0,1,fp8,fp8,0,1.749509334564209
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,64,0,1,float16,fp8,0,1.888821283976237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,64,0,1,float16,float16,0,1.0227680206298828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,64,128,1,float16,fp8,0,0.5129760106404623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,64,128,1,fp8,fp8,0,0.4848693211873372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,64,128,1,float16,float16,0,0.4394986629486084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,64,0,1,fp8,fp8,0,0.9631413618723551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,64,0,1,float16,fp8,0,1.0363360246022542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,64,0,1,float16,float16,0,0.9551093578338623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,64,128,1,float16,fp8,0,0.4431573152542114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,64,128,1,fp8,fp8,0,0.41363732020060223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,64,128,1,float16,float16,0,0.4427786668141683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,64,0,1,float16,fp8,0,0.9620693524678549
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,64,0,1,fp8,fp8,0,0.8913173675537109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,64,0,1,float16,float16,0,0.9572106997172037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,64,128,1,float16,fp8,0,0.4471946557362874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,64,128,1,fp8,fp8,0,0.419269323348999
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,64,128,1,float16,float16,0,0.4475253423055013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,64,0,1,float16,fp8,0,0.9627626736958822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,64,0,1,fp8,fp8,0,0.8952213128407797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,64,0,1,float16,float16,0,0.9639573097229004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,64,128,1,float16,fp8,0,0.4517546494801839
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,64,128,1,fp8,fp8,0,0.42289066314697266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,64,128,1,float16,float16,0,0.454090674718221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,64,0,1,float16,fp8,0,0.9682506720225016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,64,0,1,fp8,fp8,0,0.8992906411488851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,64,0,1,float16,float16,0,0.9711893399556478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,64,128,1,float16,fp8,0,0.4596586624781291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,64,128,1,fp8,fp8,0,0.431328018506368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,64,128,1,float16,float16,0,0.2711306611696879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,64,0,1,float16,fp8,0,0.9797600110371908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,64,128,1,float16,fp8,0,0.2786826690038045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,64,0,1,float16,float16,0,0.5444586674372355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,64,0,1,fp8,fp8,0,0.9067573547363281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,64,128,1,fp8,fp8,0,0.26554133494695026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,64,128,1,float16,float16,0,0.23629866043726602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,64,0,1,fp8,fp8,0,0.5145440101623535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,64,0,1,float16,fp8,0,0.5514773527781168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,64,128,1,float16,fp8,0,0.2385173241297404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,64,128,1,fp8,fp8,0,0.22815465927124023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,64,0,1,float16,float16,0,0.5065653324127197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,64,0,1,float16,fp8,0,0.5077279806137085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,64,128,1,float16,float16,0,0.23742934068044028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,64,0,1,fp8,fp8,0,0.4768480062484741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,64,0,1,float16,float16,0,0.5077973206837972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,64,128,1,float16,fp8,0,0.240064005057017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,64,128,1,fp8,fp8,0,0.22869332631429037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,64,128,1,float16,float16,0,0.2409013311068217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,64,0,1,float16,fp8,0,0.5091520150502523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,64,0,1,fp8,fp8,0,0.4795733292897542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,64,0,1,float16,float16,0,0.5110719998677572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,64,128,1,float16,fp8,0,0.24472532669703165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,64,128,1,fp8,fp8,0,0.23224000136057535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,64,128,1,float16,float16,0,0.24743467569351196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,64,0,1,float16,fp8,0,0.5152000188827515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,64,0,1,fp8,fp8,0,0.482256015141805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,64,0,1,float16,float16,0,0.5176213184992472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,64,128,1,float16,fp8,0,0.25008533398310345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,64,128,1,fp8,fp8,0,0.23847466707229614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,64,0,1,float16,fp8,0,0.5201706488927206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,64,128,1,float16,float16,0,0.15454933047294617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,64,0,1,fp8,fp8,0,0.48787200450897217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,64,0,1,float16,float16,0,0.306986669699351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,64,128,1,float16,fp8,0,0.1584053337574005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,64,128,1,fp8,fp8,0,0.15471999843915304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,64,0,1,float16,fp8,0,0.31011732419331867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,64,0,1,fp8,fp8,0,0.29198400179545086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,64,128,1,float16,float16,0,0.13686399658521017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,64,0,1,float16,float16,0,0.2863573431968689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,64,128,1,float16,fp8,0,0.1361120045185089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,64,128,1,fp8,fp8,0,0.12872533003489176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,64,0,1,float16,fp8,0,0.28572799762090045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,64,128,1,float16,float16,0,0.13615999619166055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,64,0,1,fp8,fp8,0,0.26689600944519043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,64,0,1,float16,float16,0,0.2871519923210144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,64,128,1,float16,fp8,0,0.13714133699735007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,64,128,1,fp8,fp8,0,0.1297813355922699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,64,0,1,float16,fp8,0,0.2885439991950989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,64,128,1,float16,float16,0,0.1381119986375173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,64,0,1,fp8,fp8,0,0.26637866099675495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,64,0,1,float16,float16,0,0.2863573431968689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,64,128,1,float16,fp8,0,0.13684266805648804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,64,128,1,fp8,fp8,0,0.13190933068593344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,64,0,1,float16,fp8,0,0.28993066151936847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,64,128,1,float16,float16,0,0.13834666212399802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,64,0,1,fp8,fp8,0,0.2690666715304057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,64,0,1,float16,float16,0,0.2892746726671855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,64,128,1,float16,fp8,0,0.14127467075983682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,64,128,1,fp8,fp8,0,0.13818132877349854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,64,0,1,float16,fp8,0,0.2900480031967163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,64,128,1,float16,float16,0,0.11346667011578877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,64,0,1,fp8,fp8,0,0.27673067649205524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,64,0,1,float16,float16,0,0.2013439933458964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,64,128,1,float16,fp8,0,0.11349333326021831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,64,128,1,fp8,fp8,0,0.11069867014884949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,64,0,1,float16,fp8,0,0.20164799690246582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,64,0,1,fp8,fp8,0,0.18942399819691977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,64,128,1,float16,float16,0,0.11188266674677531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,64,128,1,fp8,fp8,0,0.10723732908566792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,64,128,1,float16,fp8,0,0.11142399907112122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,64,0,1,float16,float16,0,0.19901333252588907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,64,128,1,float16,float16,0,0.11170132954915364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,64,0,1,fp8,fp8,0,0.18757333358128866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,64,0,1,float16,fp8,0,0.20092799266179404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,64,128,1,float16,fp8,0,0.11132267117500305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,64,128,1,fp8,fp8,0,0.10773866375287373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,64,0,1,float16,float16,0,0.20002132654190063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,64,128,1,float16,float16,0,0.11288533608118693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,64,0,1,fp8,fp8,0,0.18742932875951132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,64,0,1,float16,fp8,0,0.1993173360824585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,64,128,1,float16,fp8,0,0.11204800009727478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,64,128,1,fp8,fp8,0,0.10744532942771912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,64,0,1,float16,float16,0,0.1996799906094869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,64,128,1,float16,float16,0,0.1135093371073405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,64,0,1,float16,fp8,0,0.1997599999109904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,64,0,1,fp8,fp8,0,0.18813333908716837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,64,128,1,float16,fp8,0,0.11134933431943257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,64,128,1,fp8,fp8,0,0.10748266180356343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,64,0,1,float16,float16,0,0.19960532585779825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,64,0,1,float16,fp8,0,0.19988266626993814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,64,0,1,fp8,fp8,0,0.1876586675643921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,64,128,1,float16,float16,0,1.2449066638946533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,64,128,1,float16,fp8,0,1.2541546821594238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,64,0,1,float16,float16,0,2.301466623942057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,64,128,1,fp8,fp8,0,1.155344009399414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,64,0,1,float16,fp8,0,2.311061382293701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,64,128,1,float16,float16,0,1.2553919951121013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,64,0,1,fp8,fp8,0,2.1347146034240723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,64,128,1,fp8,fp8,0,1.1727840105692546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,64,128,1,float16,fp8,0,1.2668533325195312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,64,0,1,float16,float16,0,2.3125759760538735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,64,128,1,float16,float16,0,1.2698773543039958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,64,0,1,float16,fp8,0,2.325285275777181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,64,0,1,fp8,fp8,0,2.1499786376953125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,64,0,1,float16,float16,0,2.327610651652018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,64,128,1,float16,fp8,0,1.2783839702606201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,64,128,1,fp8,fp8,0,1.1875786781311035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,64,128,1,float16,float16,0,1.2928799788157146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,64,0,1,float16,fp8,0,2.3382879892985025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,64,0,1,fp8,fp8,0,2.1656319300333657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,64,0,1,float16,float16,0,2.3579840660095215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,64,128,1,float16,fp8,0,1.3032800356547039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,64,128,1,fp8,fp8,0,1.2181119918823242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,64,128,1,float16,float16,0,0.7279733022054037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,64,0,1,float16,float16,0,1.2746079762776692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,64,0,1,float16,fp8,0,2.3661600748697915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,64,128,1,float16,fp8,0,0.743120034535726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,64,0,1,fp8,fp8,0,2.195583979288737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,64,128,1,fp8,fp8,0,0.7013759613037109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,64,128,1,float16,float16,0,0.6354026794433594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,64,0,1,float16,fp8,0,1.2900853157043457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,64,0,1,fp8,fp8,0,1.201317310333252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,64,128,1,float16,fp8,0,0.6422826846440634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,64,0,1,float16,float16,0,1.1731946468353271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,64,128,1,fp8,fp8,0,0.5947786569595337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,64,0,1,float16,fp8,0,1.180351972579956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,64,128,1,float16,float16,0,0.6409866809844971
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,64,0,1,fp8,fp8,0,1.0915093421936035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,64,0,1,float16,float16,0,1.1779306729634602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,64,128,1,fp8,fp8,0,0.5995359818140665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,64,128,1,float16,fp8,0,0.6473226547241211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,64,128,1,float16,float16,0,0.6470453341801962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,64,0,1,float16,fp8,0,1.1855733394622803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,64,0,1,fp8,fp8,0,1.0961919625600178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,64,0,1,float16,float16,0,1.1858293215433757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,64,128,1,float16,fp8,0,0.6529599825541178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,64,128,1,fp8,fp8,0,0.6063093344370524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,64,0,1,float16,fp8,0,1.1924106280008953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,64,128,1,float16,float16,0,0.6578933397928873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,64,0,1,fp8,fp8,0,1.1035679976145427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,64,0,1,float16,float16,0,1.2005013624827068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,64,128,1,float16,fp8,0,0.6667733192443848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,64,128,1,fp8,fp8,0,0.6200000047683716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,64,128,1,float16,float16,0,0.38149865468343097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,64,0,1,float16,fp8,0,1.2072052955627441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,64,0,1,float16,float16,0,0.6643786827723185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,64,0,1,fp8,fp8,0,1.1177813212076824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,64,128,1,float16,fp8,0,0.38976001739501953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,64,128,1,fp8,fp8,0,0.3694560130437215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,64,128,1,float16,float16,0,0.33289599418640137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,64,0,1,float16,fp8,0,0.672432025273641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,64,0,1,fp8,fp8,0,0.6278826793034872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,64,0,1,float16,float16,0,0.6115786631902059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,64,128,1,fp8,fp8,0,0.3162826697031657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,64,128,1,float16,fp8,0,0.33685866991678876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,64,0,1,float16,fp8,0,0.6162933508555094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,64,128,1,float16,float16,0,0.3365386724472046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,64,0,1,fp8,fp8,0,0.5740213394165039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,64,0,1,float16,float16,0,0.6142986615498861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,64,128,1,float16,fp8,0,0.33880531787872314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,64,128,1,fp8,fp8,0,0.31831467151641846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,64,0,1,float16,fp8,0,0.6167093515396118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,64,128,1,float16,float16,0,0.3394240140914917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,64,0,1,fp8,fp8,0,0.5766880114873251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,64,128,1,float16,fp8,0,0.3442453145980835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,64,128,1,fp8,fp8,0,0.32254934310913086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,64,0,1,float16,float16,0,0.6180373430252075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,64,128,1,float16,float16,0,0.3481760025024414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,64,0,1,fp8,fp8,0,0.5794773499170939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,64,0,1,float16,fp8,0,0.6229493220647176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,64,128,1,float16,fp8,0,0.3514560063680013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,64,128,1,fp8,fp8,0,0.32970666885375977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,64,0,1,float16,float16,0,0.6250506639480591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,64,128,1,float16,float16,0,0.20821332931518555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,64,0,1,fp8,fp8,0,0.586298664410909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,64,0,1,float16,fp8,0,0.6305333375930786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,64,0,1,float16,float16,0,0.3572746515274048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,64,128,1,float16,fp8,0,0.21241066853205362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,64,128,1,fp8,fp8,0,0.20381333430608115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,64,128,1,float16,float16,0,0.1795039971669515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,64,0,1,float16,fp8,0,0.36181867122650146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,64,0,1,fp8,fp8,0,0.34095466136932373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,64,128,1,float16,fp8,0,0.18267732858657837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,64,128,1,fp8,fp8,0,0.1746079921722412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,64,0,1,float16,float16,0,0.32740267117818195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,64,128,1,float16,float16,0,0.18002132574717203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,64,0,1,float16,fp8,0,0.3300960063934326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,64,0,1,fp8,fp8,0,0.3102399905522664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,64,128,1,float16,fp8,0,0.1830880045890808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,64,128,1,fp8,fp8,0,0.17693867286046347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,64,0,1,float16,float16,0,0.32984532912572223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,64,128,1,float16,float16,0,0.18341867129007974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,64,0,1,fp8,fp8,0,0.3129066626230876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,64,0,1,float16,fp8,0,0.3289066751797994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,64,128,1,float16,fp8,0,0.18534932533899942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,64,128,1,fp8,fp8,0,0.1794453263282776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,64,0,1,float16,float16,0,0.331712007522583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,64,128,1,float16,float16,0,0.18928533792495728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,64,0,1,fp8,fp8,0,0.3160426616668701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,64,0,1,float16,fp8,0,0.3350133498509725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,64,128,1,float16,fp8,0,0.19135467211405435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,64,0,1,float16,float16,0,0.3365279833475749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,64,128,1,fp8,fp8,0,0.18467734257380167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,64,0,1,float16,fp8,0,0.3393973509470622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,64,128,1,float16,float16,0,0.1216319998105367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,64,0,1,fp8,fp8,0,0.3203893303871155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,64,0,1,float16,float16,0,0.2058186729749044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,64,128,1,float16,fp8,0,0.12401599685351054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,64,128,1,fp8,fp8,0,0.12204266587893169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,64,128,1,float16,float16,0,0.10796266794204712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,64,0,1,float16,fp8,0,0.20800532897313437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,64,0,1,fp8,fp8,0,0.19776000579198202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,64,0,1,float16,float16,0,0.18973867098490396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,64,128,1,float16,fp8,0,0.10877866546312968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,64,128,1,fp8,fp8,0,0.10100266337394714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,64,0,1,float16,fp8,0,0.1901599963506063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,64,0,1,fp8,fp8,0,0.17729065815607706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,64,128,1,float16,float16,0,0.10746133327484131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,64,0,1,float16,float16,0,0.1928373376528422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,64,128,1,float16,fp8,0,0.10750933488210042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,64,128,1,fp8,fp8,0,0.10129066308339436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,64,0,1,float16,fp8,0,0.1916266679763794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,64,128,1,float16,float16,0,0.10746133327484131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,64,0,1,fp8,fp8,0,0.17925333976745605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,64,0,1,float16,float16,0,0.1917333404223124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,64,128,1,float16,fp8,0,0.1090133289496104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,64,128,1,fp8,fp8,0,0.10314666231473286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,64,0,1,float16,fp8,0,0.1930720011393229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,64,128,1,float16,float16,0,0.1092746655146281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,64,0,1,fp8,fp8,0,0.17896533012390137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,64,0,1,float16,float16,0,0.19241066773732504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,64,128,1,float16,fp8,0,0.11149866382280986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,64,128,1,fp8,fp8,0,0.10538666447003682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,64,0,1,float16,fp8,0,0.19289066394170126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,64,128,1,float16,float16,0,0.08695466319719951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,64,0,1,fp8,fp8,0,0.18181333939234415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,64,0,1,float16,float16,0,0.13809600472450256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,64,128,1,float16,fp8,0,0.0869653324286143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,64,128,1,fp8,fp8,0,0.08448533217112224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,64,0,1,float16,fp8,0,0.13843733072280884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,64,0,1,fp8,fp8,0,0.13201066851615906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,64,128,1,float16,float16,0,0.08665066957473755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,64,0,1,float16,float16,0,0.13979199528694153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,64,128,1,float16,fp8,0,0.0867146650950114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,64,128,1,fp8,fp8,0,0.08468799789746602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,64,0,1,float16,fp8,0,0.13800000150998434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,64,0,1,fp8,fp8,0,0.13185600439707437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,64,128,1,float16,float16,0,0.08713600039482117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,64,0,1,float16,float16,0,0.13795733451843262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,64,128,1,float16,fp8,0,0.08788800239562988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,64,128,1,fp8,fp8,0,0.08469333251317342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,64,0,1,float16,fp8,0,0.13833066821098328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,64,128,1,float16,float16,0,0.08669867118199666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,64,0,1,fp8,fp8,0,0.13090667128562927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,64,0,1,float16,float16,0,0.1379039982954661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,64,128,1,float16,fp8,0,0.08835732936859131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,64,128,1,fp8,fp8,0,0.08426133791605632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,64,0,1,float16,fp8,0,0.13804266850153604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,64,0,1,fp8,fp8,0,0.13074666261672974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,64,128,1,float16,float16,0,0.08678399523099263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,64,128,1,float16,fp8,0,0.08699199557304382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,64,0,1,float16,float16,0,0.1381600002447764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,64,128,1,fp8,fp8,0,0.0830506682395935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,64,0,1,float16,fp8,0,0.13937600453694662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,64,0,1,fp8,fp8,0,0.12980799873669943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,64,128,1,float16,float16,0,1.669904073079427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,64,128,1,float16,fp8,0,1.6728639602661133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,64,0,1,float16,float16,0,2.496607939402262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,64,128,1,fp8,fp8,0,1.5278132756551106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,64,0,1,float16,fp8,0,2.502021312713623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,64,0,1,fp8,fp8,0,2.293776035308838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,64,128,1,float16,float16,0,1.6899199485778809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,64,128,1,float16,fp8,0,1.6936853726704915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,64,128,1,fp8,fp8,0,1.5462719599405925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,64,0,1,float16,float16,0,2.5189812978108725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,64,128,1,float16,float16,0,1.7230346997578938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,64,0,1,float16,fp8,0,2.5218559900919595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,64,0,1,fp8,fp8,0,2.309744040171305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,64,0,1,float16,float16,0,2.5550986925760903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,64,128,1,float16,fp8,0,1.7186506589253743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,64,128,1,fp8,fp8,0,1.568511962890625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,64,0,1,float16,fp8,0,2.5502880414326987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,64,0,1,fp8,fp8,0,2.3310933113098145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,64,128,1,float16,float16,0,1.7624425888061523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,64,0,1,float16,float16,0,2.5934720039367676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,64,128,1,float16,fp8,0,1.751599947611491
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,64,128,1,fp8,fp8,0,1.6040107409159343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,64,128,1,float16,float16,0,0.9610293706258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,64,0,1,float16,fp8,0,2.586479981740316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,64,0,1,float16,float16,0,1.3905493418375652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,64,128,1,float16,fp8,0,0.974069356918335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,64,0,1,fp8,fp8,0,2.3684800465901694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,64,128,1,fp8,fp8,0,0.9224426746368408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,64,0,1,float16,fp8,0,1.4062453905741374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,64,128,1,float16,float16,0,0.8309546311696371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,64,0,1,fp8,fp8,0,1.311520020167033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,64,0,1,float16,float16,0,1.2511680126190186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,64,128,1,float16,fp8,0,0.8389493624369303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,64,128,1,fp8,fp8,0,0.7734560171763102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,64,0,1,float16,fp8,0,1.257472038269043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,64,128,1,float16,float16,0,0.8391093413035074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,64,0,1,fp8,fp8,0,1.1595040162404378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,64,0,1,float16,float16,0,1.2594613234202068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,64,128,1,fp8,fp8,0,0.7817333539326986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,64,128,1,float16,fp8,0,0.8452640374501547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,64,0,1,float16,fp8,0,1.2665973504384358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,64,0,1,fp8,fp8,0,1.1701599756876628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,64,128,1,float16,float16,0,0.8475946585337321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,64,0,1,float16,float16,0,1.2676640351613362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,64,128,1,float16,fp8,0,0.8560640017191569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,64,128,1,fp8,fp8,0,0.7912000020345052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,64,0,1,float16,fp8,0,1.2776906490325928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,64,128,1,float16,float16,0,0.8630239963531494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,64,0,1,fp8,fp8,0,1.1776266892751057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,64,0,1,float16,float16,0,1.2842346827189128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,64,128,1,float16,fp8,0,0.8719200293223063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,64,128,1,fp8,fp8,0,0.8071733315785726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,64,128,1,float16,float16,0,0.49297598997751874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,64,0,1,float16,fp8,0,1.292197306950887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,64,0,1,float16,float16,0,0.7124853134155273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,64,0,1,fp8,fp8,0,1.1953333218892415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,64,128,1,float16,fp8,0,0.5020853281021118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,64,128,1,fp8,fp8,0,0.4744266668955485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,64,0,1,float16,fp8,0,0.7220533688863119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,64,128,1,float16,float16,0,0.427349328994751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,64,0,1,fp8,fp8,0,0.674021323521932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,64,0,1,float16,float16,0,0.6437973181406657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,64,128,1,float16,fp8,0,0.4307146469751994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,64,128,1,fp8,fp8,0,0.40165332953135174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,64,0,1,float16,fp8,0,0.6478240092595419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,64,0,1,fp8,fp8,0,0.6012053489685059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,64,128,1,float16,float16,0,0.43359466393788654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,64,0,1,float16,float16,0,0.6495466629664103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,64,128,1,float16,fp8,0,0.43646399180094403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,64,128,1,fp8,fp8,0,0.4079039891560872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,64,0,1,float16,fp8,0,0.6519893407821655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,64,0,1,fp8,fp8,0,0.6059679985046387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,64,128,1,float16,float16,0,0.4377280076344808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,64,0,1,float16,float16,0,0.6523893276850382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,64,128,1,float16,fp8,0,0.4416693449020386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,64,128,1,fp8,fp8,0,0.4111200173695882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,64,0,1,float16,fp8,0,0.6586986780166626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,64,0,1,fp8,fp8,0,0.6093226671218872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,64,128,1,float16,float16,0,0.4450186491012573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,64,0,1,float16,float16,0,0.662831981976827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,64,128,1,float16,fp8,0,0.45021335283915204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,64,128,1,fp8,fp8,0,0.41969601313273114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,64,0,1,float16,fp8,0,0.6654880046844482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,64,128,1,float16,float16,0,0.26053333282470703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,64,0,1,float16,float16,0,0.3778719902038574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,64,0,1,fp8,fp8,0,0.6182933251063029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,64,128,1,fp8,fp8,0,0.25277332464853924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,64,128,1,float16,fp8,0,0.265664001305898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,64,128,1,float16,float16,0,0.22461867332458496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,64,0,1,fp8,fp8,0,0.3572479883829753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,64,0,1,float16,fp8,0,0.38202667236328125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,64,0,1,float16,float16,0,0.3370506763458252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,64,128,1,fp8,fp8,0,0.21605332692464194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,64,128,1,float16,fp8,0,0.22604266802469888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,64,0,1,float16,fp8,0,0.3387306531270345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,64,128,1,float16,float16,0,0.22604266802469888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,64,0,1,fp8,fp8,0,0.32021333773930866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,64,0,1,float16,float16,0,0.3383839925130208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,64,128,1,float16,fp8,0,0.22799466053644815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,64,128,1,fp8,fp8,0,0.21755733092625937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,64,0,1,float16,fp8,0,0.34118398030598956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,64,128,1,float16,float16,0,0.22882666190465292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,64,0,1,fp8,fp8,0,0.3223573366800944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,64,0,1,float16,float16,0,0.3436266581217448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,64,128,1,fp8,fp8,0,0.22046933571497598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,64,128,1,float16,fp8,0,0.2306613326072693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,64,128,1,float16,float16,0,0.23507199684778848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,64,0,1,float16,fp8,0,0.34517331918080646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,64,0,1,fp8,fp8,0,0.3248533407847087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,64,128,1,fp8,fp8,0,0.2257173260052999
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,64,128,1,float16,fp8,0,0.23884799083073935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,64,0,1,float16,float16,0,0.34924264748891193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,64,128,1,float16,float16,0,0.14377599954605103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,64,0,1,fp8,fp8,0,0.3306186596552531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,64,0,1,float16,fp8,0,0.35290666421254474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,64,0,1,float16,float16,0,0.20915200312932333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,64,128,1,float16,fp8,0,0.14602667093276978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,64,128,1,fp8,fp8,0,0.1426293353239695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,64,0,1,float16,fp8,0,0.211626668771108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,64,128,1,float16,float16,0,0.12185066938400269
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,64,0,1,fp8,fp8,0,0.19950934251149496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,64,128,1,float16,fp8,0,0.12352533141771953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,64,0,1,float16,float16,0,0.18638400236765543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,64,128,1,fp8,fp8,0,0.11468799908955891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,64,0,1,float16,fp8,0,0.18733332554499307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,64,128,1,float16,float16,0,0.12161067128181458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,64,0,1,fp8,fp8,0,0.17306133111317953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,64,128,1,float16,fp8,0,0.12378133336702983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,64,128,1,fp8,fp8,0,0.11585600177447002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,64,0,1,float16,float16,0,0.18547199169794717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,64,0,1,float16,fp8,0,0.1875306765238444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,64,128,1,float16,float16,0,0.12196266651153564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,64,0,1,fp8,fp8,0,0.1758506695429484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,64,128,1,float16,fp8,0,0.12361066540082295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,64,128,1,fp8,fp8,0,0.11973866820335388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,64,0,1,float16,float16,0,0.18659732739130655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,64,128,1,float16,float16,0,0.12588266531626383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,64,0,1,float16,fp8,0,0.18787733713785806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,64,0,1,fp8,fp8,0,0.17798399925231934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,64,128,1,float16,fp8,0,0.1293226679166158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,64,128,1,fp8,fp8,0,0.12597866853078207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,64,0,1,float16,float16,0,0.1914773384730021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,64,128,1,float16,float16,0,0.08366933465003967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,64,0,1,float16,fp8,0,0.19270400206247965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,64,0,1,fp8,fp8,0,0.18422933419545492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,64,0,1,float16,float16,0,0.11885333061218262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,64,128,1,float16,fp8,0,0.08691199620564778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,64,128,1,fp8,fp8,0,0.08684800068537395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,64,0,1,float16,fp8,0,0.12190399567286174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,64,128,1,float16,float16,0,0.0795360008875529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,64,0,1,fp8,fp8,0,0.11979200442632039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,64,128,1,float16,fp8,0,0.07867200175921123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,64,0,1,float16,float16,0,0.11385066310564677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,64,128,1,fp8,fp8,0,0.07225066423416138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,64,0,1,float16,fp8,0,0.11353066563606262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,64,0,1,fp8,fp8,0,0.10684800148010254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,64,128,1,float16,float16,0,0.07881600161393483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,64,0,1,float16,float16,0,0.11169067025184631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,64,128,1,fp8,fp8,0,0.0746559997399648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,64,0,1,float16,fp8,0,0.11489066481590271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,64,128,1,float16,fp8,0,0.08066133161385854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,64,0,1,fp8,fp8,0,0.10637332995732625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,64,128,1,float16,float16,0,0.07704000174999237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,64,128,1,float16,fp8,0,0.0786293347676595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,64,0,1,float16,float16,0,0.11347200473149617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,64,128,1,fp8,fp8,0,0.07551466425259908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,64,0,1,float16,fp8,0,0.11373866597811381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,64,128,1,float16,float16,0,0.07849599917729695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,64,0,1,fp8,fp8,0,0.10757866501808167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,64,0,1,float16,float16,0,0.11411733428637187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,64,128,1,float16,fp8,0,0.08069866895675659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,64,128,1,fp8,fp8,0,0.07657066484292348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,64,0,1,float16,fp8,0,0.11563199758529663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,64,128,1,float16,float16,0,0.06374399860699971
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,64,0,1,fp8,fp8,0,0.10827199618021648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,64,0,1,float16,float16,0,0.08914132912953694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,64,128,1,float16,fp8,0,0.06300266583760579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,64,128,1,fp8,fp8,0,0.06197333335876465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,64,0,1,float16,fp8,0,0.08905599514643352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,64,0,1,fp8,fp8,0,0.08292800188064575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,64,128,1,float16,float16,0,0.06229333579540253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,64,0,1,float16,float16,0,0.08892800410588582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,64,128,1,float16,fp8,0,0.062314664324124656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,64,128,1,fp8,fp8,0,0.06020799775918325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,64,0,1,float16,fp8,0,0.08700266480445862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,64,0,1,fp8,fp8,0,0.08330666522185008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,64,128,1,float16,float16,0,0.06324266890684764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,64,0,1,float16,float16,0,0.08731733759244283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,64,128,1,float16,fp8,0,0.0622026671965917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,64,128,1,fp8,fp8,0,0.06020799775918325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,64,0,1,fp8,fp8,0,0.08418666323026021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,64,128,1,float16,float16,0,0.06217599908510844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,64,0,1,float16,fp8,0,0.08918399612108867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,64,0,1,float16,float16,0,0.08896000186602275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,64,128,1,float16,fp8,0,0.06398933132489522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,64,128,1,fp8,fp8,0,0.060266668597857155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,64,0,1,float16,fp8,0,0.0888853371143341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,64,0,1,fp8,fp8,0,0.0845973292986552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,64,128,1,float16,float16,0,0.06275199850400288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,64,0,1,float16,float16,0,0.08709333340326945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,64,128,1,float16,fp8,0,0.06229866544405619
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,64,128,1,fp8,fp8,0,0.06047999858856201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,64,0,1,float16,fp8,0,0.08707200487454732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,64,0,1,fp8,fp8,0,0.08298133313655853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,64,128,1,float16,float16,0,1.2505333423614502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,64,128,1,float16,fp8,0,1.2589173316955566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,64,0,1,float16,float16,0,1.6600213050842285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,64,128,1,fp8,fp8,0,1.1542719999949138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,64,0,1,float16,fp8,0,1.6683786710103352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,64,0,1,fp8,fp8,0,1.5311466852823894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,64,128,1,float16,float16,0,1.2784852981567383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,64,128,1,float16,fp8,0,1.2803839842478435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,64,0,1,float16,float16,0,1.6921280225118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,64,128,1,fp8,fp8,0,1.1702666282653809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,64,0,1,fp8,fp8,0,1.5490187009175618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,64,0,1,float16,fp8,0,1.6908000310262044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,64,128,1,float16,float16,0,1.2920373280843098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,64,0,1,float16,float16,0,1.703861395517985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,64,128,1,fp8,fp8,0,1.1821973323822021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,64,128,1,float16,fp8,0,1.2918826738993328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,64,0,1,float16,fp8,0,1.6997493108113606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,64,128,1,float16,float16,0,1.3152533372243245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,64,0,1,fp8,fp8,0,1.5594293276468914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,64,0,1,float16,float16,0,1.729413350423177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,64,128,1,float16,fp8,0,1.3160053094228108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,64,128,1,fp8,fp8,0,1.2115946610768635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,64,128,1,float16,float16,0,0.7304159800211588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,64,0,1,float16,fp8,0,1.7290612856547039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,64,0,1,fp8,fp8,0,1.590234597524007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,64,0,1,float16,float16,0,0.9478826522827148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,64,128,1,float16,fp8,0,0.7368053595225016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,64,128,1,fp8,fp8,0,0.6952319939931234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,64,0,1,float16,fp8,0,0.9538773695627848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,64,128,1,float16,float16,0,0.6276959975560507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,64,0,1,fp8,fp8,0,0.8904533386230469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,64,0,1,float16,float16,0,0.8355573018391927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,64,128,1,float16,fp8,0,0.6340533494949341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,64,128,1,fp8,fp8,0,0.5839626789093018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,64,0,1,float16,fp8,0,0.84279998143514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,64,0,1,fp8,fp8,0,0.775989294052124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,64,128,1,float16,float16,0,0.6375786860783895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,64,0,1,float16,float16,0,0.8432426452636719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,64,128,1,float16,fp8,0,0.6399840116500854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,64,128,1,fp8,fp8,0,0.5924426714579264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,64,0,1,float16,fp8,0,0.8506293296813965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,64,0,1,fp8,fp8,0,0.7832960287729899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,64,128,1,float16,float16,0,0.6401386658350626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,64,0,1,float16,float16,0,0.8505226771036783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,64,128,1,float16,fp8,0,0.6478240092595419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,64,128,1,fp8,fp8,0,0.5993333260218302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,64,0,1,float16,fp8,0,0.8578506310780843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,64,128,1,float16,float16,0,0.653493324915568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,64,0,1,fp8,fp8,0,0.7903839747111002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,64,0,1,float16,float16,0,0.8642666339874268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,64,128,1,float16,fp8,0,0.6603253285090128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,64,128,1,fp8,fp8,0,0.6127520004908243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,64,128,1,float16,float16,0,0.3758080005645752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,64,0,1,float16,fp8,0,0.8702613512674967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,64,0,1,fp8,fp8,0,0.8049973646799723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,64,0,1,float16,float16,0,0.48975467681884766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,64,128,1,float16,fp8,0,0.38184531529744464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,64,128,1,fp8,fp8,0,0.36135466893513996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,64,0,1,fp8,fp8,0,0.46342400709788006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,64,0,1,float16,fp8,0,0.4939093192418416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,64,128,1,float16,float16,0,0.3232106765111287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,64,0,1,float16,float16,0,0.4310506582260132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,64,128,1,float16,fp8,0,0.3262186646461487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,64,128,1,fp8,fp8,0,0.3064533273379008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,64,0,1,float16,fp8,0,0.43538133303324383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,64,0,1,fp8,fp8,0,0.40536534786224365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,64,128,1,float16,float16,0,0.3255893389383952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,64,0,1,float16,float16,0,0.43376000722249347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,64,128,1,float16,fp8,0,0.3291520078976949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,64,128,1,fp8,fp8,0,0.3102133274078369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,64,0,1,float16,fp8,0,0.43647468090057373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,64,128,1,float16,float16,0,0.3322346607844035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,64,0,1,fp8,fp8,0,0.4087839921315511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,64,0,1,float16,float16,0,0.4399679899215698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,64,128,1,fp8,fp8,0,0.3141653339068095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,64,128,1,float16,fp8,0,0.3349173466364543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,64,0,1,float16,fp8,0,0.4419306516647339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,64,0,1,fp8,fp8,0,0.41258132457733154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,64,128,1,float16,float16,0,0.3389066855112712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,64,128,1,float16,fp8,0,0.3421226739883423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,64,0,1,float16,float16,0,0.4494239886601766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,64,128,1,fp8,fp8,0,0.3198453386624654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,64,128,1,float16,float16,0,0.20075732469558716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,64,0,1,float16,fp8,0,0.4523306687672933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,64,0,1,float16,float16,0,0.26016000906626385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,64,0,1,fp8,fp8,0,0.4203946590423584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,64,128,1,float16,fp8,0,0.20403732856114706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,64,128,1,fp8,fp8,0,0.1946613391240438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,64,0,1,float16,fp8,0,0.26414400339126587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,64,0,1,fp8,fp8,0,0.2485919992129008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,64,128,1,float16,float16,0,0.16857600212097168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,64,0,1,float16,float16,0,0.22749332586924234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,64,128,1,float16,fp8,0,0.170799990495046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,64,128,1,fp8,fp8,0,0.16486933827400208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,64,0,1,float16,fp8,0,0.22858132918675741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,64,0,1,fp8,fp8,0,0.21837866306304932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,64,128,1,float16,float16,0,0.169487992922465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,64,128,1,float16,fp8,0,0.1729653278986613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,64,128,1,fp8,fp8,0,0.16679465770721436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,64,0,1,float16,float16,0,0.22689600785573324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,64,128,1,float16,float16,0,0.17185600598653158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,64,0,1,float16,fp8,0,0.23057067394256592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,64,0,1,fp8,fp8,0,0.22004799048105875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,64,128,1,float16,fp8,0,0.17528533935546875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,64,128,1,fp8,fp8,0,0.1704960068066915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,64,0,1,float16,float16,0,0.22957332928975424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,64,0,1,fp8,fp8,0,0.22386133670806885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,64,0,1,float16,fp8,0,0.2323946754137675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,64,128,1,float16,float16,0,0.17740267515182495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,64,128,1,float16,fp8,0,0.18041600783665976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,64,128,1,fp8,fp8,0,0.17463467518488565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,64,0,1,float16,float16,0,0.2355626622835795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,64,128,1,float16,float16,0,0.10991467038790385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,64,0,1,float16,fp8,0,0.2386666735013326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,64,0,1,fp8,fp8,0,0.22663466135660806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,64,0,1,float16,float16,0,0.14272532860438028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,64,128,1,float16,fp8,0,0.1135040024916331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,64,128,1,fp8,fp8,0,0.1125973363717397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,64,0,1,float16,fp8,0,0.14661866426467896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,64,128,1,float16,float16,0,0.09706667065620422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,64,0,1,fp8,fp8,0,0.142085333665212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,64,128,1,float16,fp8,0,0.09753066301345825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,64,128,1,fp8,fp8,0,0.09090666969617207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,64,0,1,float16,float16,0,0.1283253331979116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,64,0,1,float16,fp8,0,0.13057600458463034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,64,0,1,fp8,fp8,0,0.11971732974052429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,64,128,1,float16,float16,0,0.09723732868830363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,64,128,1,float16,fp8,0,0.09698667128880818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,64,128,1,fp8,fp8,0,0.09129599730173747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,64,0,1,float16,float16,0,0.12812800208727518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,64,0,1,float16,fp8,0,0.12965866923332214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,64,128,1,float16,float16,0,0.09712533156077068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,64,0,1,fp8,fp8,0,0.12034133076667786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,64,128,1,float16,fp8,0,0.09753599762916565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,64,0,1,float16,float16,0,0.12793599565823874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,64,128,1,fp8,fp8,0,0.09139733513196309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,64,0,1,float16,fp8,0,0.1305333375930786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,64,0,1,fp8,fp8,0,0.12149332960446675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,64,128,1,float16,float16,0,0.09934399525324504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,64,128,1,fp8,fp8,0,0.09506133198738098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,64,128,1,float16,fp8,0,0.10134933392206828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,64,0,1,float16,float16,0,0.13194132844607034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,64,128,1,float16,float16,0,0.06619733572006226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,64,0,1,float16,fp8,0,0.13170133034388223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,64,0,1,fp8,fp8,0,0.1253653367360433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,64,0,1,float16,float16,0,0.0869706670443217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,64,128,1,float16,fp8,0,0.06818133095900218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,64,128,1,fp8,fp8,0,0.06605333089828491
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,64,0,1,float16,fp8,0,0.08911466598510742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,64,0,1,fp8,fp8,0,0.08493866523106892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,64,128,1,float16,float16,0,0.062047998110453285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,64,0,1,float16,float16,0,0.08293866614500682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,64,128,1,float16,fp8,0,0.06228800117969513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,64,128,1,fp8,fp8,0,0.05834666887919108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,64,0,1,float16,fp8,0,0.08274133503437042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,64,0,1,fp8,fp8,0,0.07869333525498708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,64,128,1,float16,float16,0,0.06241066753864288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,64,128,1,float16,fp8,0,0.06205866734186808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,64,0,1,float16,float16,0,0.08286933104197185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,64,128,1,fp8,fp8,0,0.06002666552861532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,64,128,1,float16,float16,0,0.06217066446940104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,64,0,1,fp8,fp8,0,0.07691200077533722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,64,0,1,float16,float16,0,0.08281599978605907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,64,0,1,float16,fp8,0,0.08353066444396973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,64,128,1,float16,fp8,0,0.06198399762312571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,64,128,1,fp8,fp8,0,0.058362667759259544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,64,0,1,float16,fp8,0,0.08418132861455281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,64,0,1,fp8,fp8,0,0.07900799810886383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,64,128,1,float16,float16,0,0.06216000020503998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,64,0,1,float16,float16,0,0.08288000027338664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,64,128,1,float16,fp8,0,0.0625546673933665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,64,128,1,fp8,fp8,0,0.060880000392595925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,64,0,1,float16,fp8,0,0.08461333314577739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,64,0,1,fp8,fp8,0,0.07878933350245158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,64,128,1,float16,float16,0,0.05234666665395101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,64,128,1,float16,fp8,0,0.05420266588528951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,64,128,1,fp8,fp8,0,0.051632001996040344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,64,0,1,float16,float16,0,0.06645333270231883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,64,0,1,float16,fp8,0,0.06478400031725566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,64,0,1,fp8,fp8,0,0.06250666578610738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,64,128,1,float16,float16,0,0.05402666827042898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,64,0,1,float16,float16,0,0.06488533318042755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,64,128,1,float16,fp8,0,0.052101333936055504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,64,128,1,fp8,fp8,0,0.05056533217430115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,64,0,1,float16,fp8,0,0.06623466809590657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,64,128,1,float16,float16,0,0.053818667928377785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,64,0,1,fp8,fp8,0,0.0629013329744339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,64,0,1,float16,float16,0,0.06631466746330261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,64,128,1,float16,fp8,0,0.054378668467203774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,64,128,1,fp8,fp8,0,0.05194133520126343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,64,0,1,float16,fp8,0,0.06689600149790446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,64,0,1,fp8,fp8,0,0.06259199976921082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,64,128,1,float16,float16,0,0.052095999320348106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,64,0,1,float16,float16,0,0.06675200164318085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,64,128,1,float16,fp8,0,0.053898667295773826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,64,128,1,fp8,fp8,0,0.050053333242734276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,64,0,1,float16,fp8,0,0.06676266590754192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,64,0,1,fp8,fp8,0,0.062314664324124656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,64,128,1,float16,float16,0,0.053818667928377785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,64,128,1,float16,fp8,0,0.05381333331267039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,64,0,1,float16,float16,0,0.06644799808661143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,64,128,1,fp8,fp8,0,0.052058666944503784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,64,0,1,float16,fp8,0,0.06559466818968455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,64,0,1,fp8,fp8,0,0.0625493327776591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,64,128,1,float16,float16,0,1.5217386881510417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,64,128,1,float16,fp8,0,1.5128746032714844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,64,0,1,float16,float16,0,1.7923572858174641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,64,128,1,fp8,fp8,0,1.4609333674112956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,64,0,1,float16,fp8,0,1.7805760701497395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,64,0,1,fp8,fp8,0,1.7042880058288574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,64,128,1,float16,float16,0,1.5337279637654622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,64,128,1,float16,fp8,0,1.5329173405965169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,64,0,1,float16,float16,0,1.8049972852071126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,64,128,1,fp8,fp8,0,1.4344479242960613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,64,0,1,fp8,fp8,0,1.6962560017903645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,64,128,1,float16,float16,0,1.577562650044759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,64,0,1,float16,fp8,0,1.7942293485005696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,64,0,1,float16,float16,0,1.837056001027425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,64,128,1,float16,fp8,0,1.5691146850585938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,64,128,1,fp8,fp8,0,1.6707946459452312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,64,0,1,float16,fp8,0,1.8246026039123535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,64,0,1,fp8,fp8,0,1.9174079895019531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,64,128,1,float16,float16,0,1.550495942433675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,64,0,1,float16,float16,0,1.8370347023010254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,64,128,1,float16,fp8,0,1.5237280527750652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,64,128,1,float16,float16,0,0.7944160302480062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,64,128,1,fp8,fp8,0,1.6511467297871907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,64,0,1,float16,fp8,0,1.7995893160502117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,64,0,1,fp8,fp8,0,1.910352071126302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,64,0,1,float16,float16,0,0.9390133221944174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,64,128,1,float16,fp8,0,0.7805386384328207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,64,128,1,fp8,fp8,0,0.8162453174591064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,64,128,1,float16,float16,0,0.771621306737264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,64,0,1,float16,fp8,0,0.9290773073832194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,64,0,1,fp8,fp8,0,0.9389013449350992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,64,0,1,float16,float16,0,0.9080586433410645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,64,128,1,float16,fp8,0,0.7650612990061442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,64,128,1,fp8,fp8,0,0.7196799914042155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,64,0,1,float16,fp8,0,0.8998613357543945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,64,0,1,fp8,fp8,0,0.8452800114949545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,64,128,1,float16,float16,0,0.7778773307800293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,64,0,1,float16,float16,0,0.9179573059082031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,64,128,1,float16,fp8,0,0.7734933694203695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,64,128,1,fp8,fp8,0,0.7215200265248617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,64,0,1,float16,fp8,0,0.9095253149668375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,64,0,1,fp8,fp8,0,0.8462186654408773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,64,128,1,float16,float16,0,0.7908266385396322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,64,0,1,float16,float16,0,0.9319893519083658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,64,128,1,float16,fp8,0,0.7851626873016357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,64,128,1,fp8,fp8,0,0.8250186443328857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,64,0,1,float16,fp8,0,0.9224373499552408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,64,0,1,fp8,fp8,0,0.9513653119405111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,64,128,1,float16,float16,0,0.776629368464152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,64,0,1,float16,float16,0,0.9138186772664388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,64,128,1,float16,fp8,0,0.7665759722391764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,64,128,1,fp8,fp8,0,0.8207573095957438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,64,128,1,float16,float16,0,0.4084959824879964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,64,0,1,float16,fp8,0,0.9032266934712728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,64,0,1,float16,float16,0,0.482965350151062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,64,0,1,fp8,fp8,0,0.9482026894887289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,64,128,1,float16,fp8,0,0.40061867237091064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,64,128,1,fp8,fp8,0,0.4113653500874837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,64,0,1,float16,fp8,0,0.475877324740092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,64,128,1,float16,float16,0,0.39370667934417725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,64,0,1,fp8,fp8,0,0.47497065862019855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,64,0,1,float16,float16,0,0.4647200107574463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,64,128,1,float16,fp8,0,0.3923413356145223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,64,128,1,fp8,fp8,0,0.3678026596705119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,64,0,1,float16,fp8,0,0.4638400077819824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,64,0,1,fp8,fp8,0,0.43164265155792236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,64,128,1,float16,float16,0,0.3997386693954468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,64,0,1,float16,float16,0,0.46981334686279297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,64,128,1,float16,fp8,0,0.3981066544850667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,64,128,1,fp8,fp8,0,0.3696746826171875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,64,0,1,float16,fp8,0,0.46931199232737225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,64,0,1,fp8,fp8,0,0.43329068024953205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,64,128,1,float16,float16,0,0.40675199031829834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,64,0,1,float16,float16,0,0.47780799865722656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,64,128,1,float16,fp8,0,0.40557865301767987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,64,128,1,fp8,fp8,0,0.40212265650431317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,64,0,1,float16,fp8,0,0.4768799940745036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,64,0,1,fp8,fp8,0,0.465989351272583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,64,128,1,float16,float16,0,0.3971946636835734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,64,0,1,float16,float16,0,0.4689120054244995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,64,128,1,float16,fp8,0,0.3930186827977498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,64,128,1,fp8,fp8,0,0.39643200238545734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,64,128,1,float16,float16,0,0.21502399444580078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,64,0,1,float16,fp8,0,0.4641066789627075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,64,0,1,fp8,fp8,0,0.46241601308186847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,64,0,1,float16,float16,0,0.2528266708056132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,64,128,1,float16,fp8,0,0.21214399735132852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,64,128,1,fp8,fp8,0,0.2153759996096293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,64,0,1,float16,fp8,0,0.2491146723429362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,64,0,1,fp8,fp8,0,0.24935466051101685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,64,128,1,float16,float16,0,0.20549333095550537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,64,0,1,float16,float16,0,0.2428213357925415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,64,128,1,float16,fp8,0,0.20548266172409058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,64,128,1,fp8,fp8,0,0.19312000274658203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,64,0,1,float16,fp8,0,0.2411200006802877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,64,0,1,fp8,fp8,0,0.22663466135660806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,64,128,1,float16,float16,0,0.2092746694882711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,64,128,1,float16,fp8,0,0.20769067605336508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,64,128,1,fp8,fp8,0,0.19401599963506064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,64,0,1,float16,float16,0,0.24555200338363647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,64,128,1,float16,float16,0,0.21253865957260132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,64,0,1,fp8,fp8,0,0.22753065824508667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,64,0,1,float16,fp8,0,0.24574400981267294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,64,128,1,float16,fp8,0,0.2116960088411967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,64,128,1,fp8,fp8,0,0.20632533232371011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,64,0,1,float16,float16,0,0.24858667453130087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,64,0,1,float16,fp8,0,0.24843200047810873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,64,128,1,float16,float16,0,0.2082080046335856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,64,0,1,fp8,fp8,0,0.23969600598017374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,64,128,1,float16,fp8,0,0.20653865734736124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,64,128,1,fp8,fp8,0,0.20563199122746786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,64,0,1,float16,float16,0,0.2458826700846354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,64,128,1,float16,float16,0,0.11878933509190877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,64,0,1,float16,fp8,0,0.2445546587308248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,64,0,1,fp8,fp8,0,0.23865600426991782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,64,0,1,float16,float16,0,0.14148799578348795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,64,128,1,float16,fp8,0,0.11699199676513672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,64,128,1,fp8,fp8,0,0.11833600203196208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,64,0,1,float16,fp8,0,0.14070933063824972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,64,0,1,fp8,fp8,0,0.13801599542299905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,64,128,1,float16,float16,0,0.10931733250617981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,64,0,1,float16,float16,0,0.13211199641227722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,64,128,1,float16,fp8,0,0.10941867033640544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,64,128,1,fp8,fp8,0,0.10340799887975057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,64,0,1,float16,fp8,0,0.13078932960828146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,64,0,1,fp8,fp8,0,0.12154666582743327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,64,128,1,float16,float16,0,0.11174933115641277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,64,0,1,float16,float16,0,0.1338933308919271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,64,128,1,float16,fp8,0,0.11087999741236369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,64,128,1,fp8,fp8,0,0.1055573324362437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,64,0,1,float16,fp8,0,0.13294399778048197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,64,0,1,fp8,fp8,0,0.12459733088811238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,64,128,1,float16,float16,0,0.11345066626866658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,64,128,1,float16,fp8,0,0.11351466178894043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,64,0,1,float16,float16,0,0.1353333294391632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,64,128,1,fp8,fp8,0,0.11081600189208984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,64,0,1,float16,fp8,0,0.13524799545605978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,64,0,1,fp8,fp8,0,0.12923733393351236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,64,128,1,float16,float16,0,0.11130133271217346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,64,128,1,float16,fp8,0,0.10988799730936687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,64,128,1,fp8,fp8,0,0.11095466216405232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,64,0,1,float16,float16,0,0.13422933220863342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,64,128,1,float16,float16,0,0.06422933439413707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,64,0,1,float16,fp8,0,0.1320746640364329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,64,0,1,fp8,fp8,0,0.13037866353988647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,64,0,1,float16,float16,0,0.07693333427111308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,64,128,1,float16,fp8,0,0.06622933348019917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,64,128,1,fp8,fp8,0,0.06788266698519389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,64,0,1,float16,fp8,0,0.0764160007238388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,64,0,1,fp8,fp8,0,0.07934399942557017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,64,128,1,float16,float16,0,0.06234133243560791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,64,0,1,float16,float16,0,0.07607999940713246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,64,128,1,float16,fp8,0,0.06214933097362518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,64,128,1,fp8,fp8,0,0.06033066908518473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,64,0,1,float16,fp8,0,0.0766293356815974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,64,128,1,float16,float16,0,0.06409599880377452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,64,0,1,fp8,fp8,0,0.07165866593519847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,64,0,1,float16,float16,0,0.07684800028800964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,64,128,1,float16,fp8,0,0.06407466530799866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,64,128,1,fp8,fp8,0,0.06195733447869619
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,64,0,1,fp8,fp8,0,0.07257600128650665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,64,128,1,float16,float16,0,0.06579733391602834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,64,0,1,float16,fp8,0,0.07693866888682048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,64,0,1,float16,float16,0,0.07645866771539052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,64,128,1,float16,fp8,0,0.06550933420658112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,64,128,1,fp8,fp8,0,0.06386666496594746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,64,0,1,float16,fp8,0,0.07787199815114339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,64,0,1,fp8,fp8,0,0.0737066666285197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,64,128,1,float16,float16,0,0.06449066599210103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,64,0,1,float16,float16,0,0.0755680004755656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,64,128,1,float16,fp8,0,0.0647680014371872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,64,128,1,fp8,fp8,0,0.06448533137639363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,64,0,1,float16,fp8,0,0.07657599945863088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,64,0,1,fp8,fp8,0,0.07470933099587758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,64,128,1,float16,float16,0,0.04186666508515676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,64,0,1,float16,float16,0,0.050554667909940086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,64,128,1,float16,fp8,0,0.040175999204317726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,64,128,1,fp8,fp8,0,0.04148799926042557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,64,0,1,float16,fp8,0,0.0498986691236496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,64,0,1,fp8,fp8,0,0.049600000182787575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,64,128,1,float16,float16,0,0.039850667119026184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,64,0,1,float16,float16,0,0.04980800052483877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,64,128,1,float16,fp8,0,0.03959999978542328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,64,128,1,fp8,fp8,0,0.03939199944337209
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,64,0,1,float16,fp8,0,0.04969066878159841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,64,0,1,fp8,fp8,0,0.04782933493455251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,64,128,1,float16,float16,0,0.03957333415746689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,64,0,1,float16,float16,0,0.0499946673711141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,64,128,1,float16,fp8,0,0.041562666495641075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,64,128,1,fp8,fp8,0,0.039349332451820374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,64,0,1,float16,fp8,0,0.050154666105906166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,64,0,1,fp8,fp8,0,0.04829333225886027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,64,128,1,float16,float16,0,0.04142933338880539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,64,0,1,float16,float16,0,0.04990399877230326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,64,128,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,64,128,1,fp8,fp8,0,0.041573333243529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,64,0,1,float16,fp8,0,0.05072533090909322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,64,0,1,fp8,fp8,0,0.04817600051561991
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,64,128,1,float16,float16,0,0.03979733337958654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,64,0,1,float16,float16,0,0.050106664498647056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,64,128,1,float16,fp8,0,0.039994666973749794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,64,128,1,fp8,fp8,0,0.04058133314053217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,64,0,1,float16,fp8,0,0.0497920016447703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,64,0,1,fp8,fp8,0,0.04994133114814758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,64,128,1,float16,float16,0,0.02756800005833308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,64,0,1,float16,float16,0,0.03342933456103007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,64,128,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,64,128,1,fp8,fp8,0,0.027610667049884796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,64,0,1,float16,fp8,0,0.03366933266321818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,64,0,1,fp8,fp8,0,0.03311466674009959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,64,128,1,float16,float16,0,0.027189334233601887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,64,0,1,float16,float16,0,0.033530667424201965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,64,128,1,float16,fp8,0,0.026719999810059864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,64,128,1,fp8,fp8,0,0.027269333600997925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,64,0,1,float16,fp8,0,0.031850665807724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,64,0,1,fp8,fp8,0,0.03173333406448364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,64,128,1,float16,float16,0,0.02626666675011317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,64,128,1,float16,fp8,0,0.027461332579453785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,64,0,1,float16,float16,0,0.032245332996050514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,64,128,1,fp8,fp8,0,0.025759999950726826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,64,0,1,float16,fp8,0,0.033402666449546814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,64,0,1,fp8,fp8,0,0.03142933299144109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,64,128,1,float16,float16,0,0.027295999228954315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,64,0,1,float16,float16,0,0.03366400053103765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,64,128,1,float16,fp8,0,0.027210667729377747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,64,128,1,fp8,fp8,0,0.02717333287000656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,64,0,1,float16,fp8,0,0.03410666684309641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,64,0,1,fp8,fp8,0,0.03349866718053818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,64,128,1,float16,float16,0,0.027471999327341717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,64,0,1,float16,float16,0,0.03349866718053818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,64,128,1,float16,fp8,0,0.027621333797772724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,64,128,1,fp8,fp8,0,0.027615999182065327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,64,0,1,float16,fp8,0,0.033285332222779594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,64,0,1,fp8,fp8,0,0.03369600077470144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,64,128,1,float16,float16,0,1.4823733965555828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,64,128,1,float16,fp8,0,1.4672479629516602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,64,0,1,float16,float16,0,1.4999732971191406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,64,128,1,fp8,fp8,0,1.4129865964253743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,64,0,1,fp8,fp8,0,1.4280799229939778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,64,0,1,float16,fp8,0,1.4856746991475422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,64,0,1,float16,float16,0,1.517072041829427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,64,128,1,float16,float16,0,1.49125337600708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,64,128,1,fp8,fp8,0,1.3948480288187664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,64,128,1,float16,fp8,0,1.4875359535217285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,64,0,1,float16,fp8,0,1.5083306630452473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,64,0,1,fp8,fp8,0,1.428101380666097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,64,128,1,float16,float16,0,1.5266292889912922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,64,0,1,float16,float16,0,1.5550079345703125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,64,128,1,float16,fp8,0,1.527349313100179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,64,128,1,fp8,fp8,0,1.6299573580423992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,64,0,1,float16,fp8,0,1.5351093610127766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,64,0,1,fp8,fp8,0,1.6428052584330242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,64,128,1,float16,float16,0,1.5065706570943196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,64,128,1,float16,fp8,0,1.4856534004211426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,64,0,1,float16,float16,0,1.5401280721028645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,64,128,1,fp8,fp8,0,1.6240746180216472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,64,128,1,float16,float16,0,0.772869348526001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,64,0,1,float16,float16,0,0.7901973724365234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,64,0,1,float16,fp8,0,1.5069066683451335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,64,0,1,fp8,fp8,0,1.6315093040466309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,64,128,1,float16,fp8,0,0.7614026864369711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,64,128,1,fp8,fp8,0,0.7748640378316244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,64,0,1,float16,fp8,0,0.7755680084228516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,64,0,1,fp8,fp8,0,0.7835573355356852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,64,128,1,float16,float16,0,0.7494346300760905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,64,0,1,float16,float16,0,0.7608319918314616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,64,128,1,float16,fp8,0,0.7439839839935303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,64,128,1,fp8,fp8,0,0.6980000336964926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,64,0,1,float16,fp8,0,0.7534453074137369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,64,0,1,fp8,fp8,0,0.7074560324350992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,64,128,1,float16,float16,0,0.7591893672943115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,64,0,1,float16,float16,0,0.7673439979553223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,64,128,1,float16,fp8,0,0.7510026295979818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,64,128,1,fp8,fp8,0,0.7014933427174886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,64,0,1,float16,fp8,0,0.7652586301167806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,64,0,1,fp8,fp8,0,0.7069119612375895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,64,128,1,float16,float16,0,0.7692000071207682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,64,0,1,float16,float16,0,0.7825067043304443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,64,128,1,float16,fp8,0,0.7682240009307861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,64,128,1,fp8,fp8,0,0.8049866358439127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,64,0,1,float16,fp8,0,0.775445302327474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,64,0,1,fp8,fp8,0,0.816485325495402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,64,128,1,float16,float16,0,0.753162701924642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,64,0,1,float16,float16,0,0.7670079867045084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,64,128,1,float16,fp8,0,0.7446346282958984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,64,128,1,fp8,fp8,0,0.8076213200887045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,64,0,1,float16,fp8,0,0.7581493059794108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,64,128,1,float16,float16,0,0.3967573245366414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,64,0,1,fp8,fp8,0,0.8027466932932535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,64,0,1,float16,float16,0,0.4069226582845052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,64,128,1,float16,fp8,0,0.3922773202260335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,64,128,1,fp8,fp8,0,0.396506667137146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,64,0,1,float16,fp8,0,0.39818131923675537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,64,0,1,fp8,fp8,0,0.40276801586151123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,64,128,1,float16,float16,0,0.3830346663792928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,64,0,1,float16,float16,0,0.3901280164718628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,64,128,1,float16,fp8,0,0.3822346528371175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,64,128,1,fp8,fp8,0,0.35558398564656574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,64,0,1,float16,fp8,0,0.3883519967397054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,64,0,1,fp8,fp8,0,0.3608800172805786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,64,128,1,float16,float16,0,0.3879839976628621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,64,0,1,float16,float16,0,0.39502398173014325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,64,128,1,float16,fp8,0,0.3858453432718913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,64,128,1,fp8,fp8,0,0.3593653440475464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,64,0,1,float16,fp8,0,0.3922346830368042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,64,0,1,fp8,fp8,0,0.36371731758117676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,64,128,1,float16,float16,0,0.3957013289133708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,64,0,1,float16,float16,0,0.4027786652247111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,64,128,1,float16,fp8,0,0.3945866823196411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,64,128,1,fp8,fp8,0,0.39026133219401044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,64,0,1,float16,fp8,0,0.40030932426452637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,64,0,1,fp8,fp8,0,0.40001598993937176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,64,128,1,float16,float16,0,0.3859200080235799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,64,0,1,float16,float16,0,0.3925280173619588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,64,128,1,float16,fp8,0,0.3826613426208496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,64,128,1,fp8,fp8,0,0.3858613173166911
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,64,0,1,float16,fp8,0,0.38950932025909424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,64,0,1,fp8,fp8,0,0.4007146755854289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,64,128,1,float16,float16,0,0.20774932702382407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,64,0,1,float16,float16,0,0.2125706672668457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,64,128,1,float16,fp8,0,0.2049013376235962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,64,128,1,fp8,fp8,0,0.20775999625523886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,64,0,1,float16,fp8,0,0.20962133010228476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,64,0,1,fp8,fp8,0,0.21063466866811117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,64,128,1,float16,float16,0,0.2002453406651815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,64,0,1,float16,float16,0,0.20482132832209268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,64,128,1,float16,fp8,0,0.19873599211374918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,64,128,1,fp8,fp8,0,0.18738667170206705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,64,0,1,float16,fp8,0,0.20195732514063516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,64,0,1,fp8,fp8,0,0.18943999210993448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,64,128,1,float16,float16,0,0.20226667324701944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,64,0,1,float16,float16,0,0.20784533023834229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,64,128,1,float16,fp8,0,0.20037867625554404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,64,128,1,fp8,fp8,0,0.1893440087636312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,64,0,1,fp8,fp8,0,0.19047999382019043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,64,128,1,float16,float16,0,0.20481600364049277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,64,0,1,float16,fp8,0,0.20402133464813232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,64,0,1,float16,float16,0,0.20989867051442465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,64,128,1,float16,fp8,0,0.20578134059906006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,64,128,1,fp8,fp8,0,0.2004959980646769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,64,0,1,float16,fp8,0,0.20864532391230264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,64,0,1,fp8,fp8,0,0.20261865854263306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,64,128,1,float16,float16,0,0.20398932695388794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,64,0,1,float16,float16,0,0.20544000466664633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,64,128,1,float16,fp8,0,0.2023893396059672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,64,128,1,fp8,fp8,0,0.19946134090423584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,64,0,1,float16,fp8,0,0.20492267608642578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,64,128,1,float16,float16,0,0.11571199695269267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,64,0,1,float16,float16,0,0.11889599760373433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,64,0,1,fp8,fp8,0,0.20172800620396933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,64,128,1,float16,fp8,0,0.11455999811490376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,64,128,1,fp8,fp8,0,0.11629866560300191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,64,0,1,float16,fp8,0,0.1153546671072642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,64,0,1,fp8,fp8,0,0.11755200227101643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,64,128,1,float16,float16,0,0.10877866546312968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,64,0,1,float16,float16,0,0.11014933387438457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,64,128,1,float16,fp8,0,0.10921066999435425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,64,128,1,fp8,fp8,0,0.10105599959691365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,64,0,1,float16,fp8,0,0.10941867033640544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,64,0,1,fp8,fp8,0,0.10281599561373393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,64,128,1,float16,float16,0,0.10956799983978271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,64,0,1,float16,float16,0,0.10991467038790385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,64,128,1,float16,fp8,0,0.10858666896820068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,64,128,1,fp8,fp8,0,0.10288533568382263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,64,0,1,float16,fp8,0,0.11001599828402202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,64,0,1,fp8,fp8,0,0.10314133763313293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,64,128,1,float16,float16,0,0.11178666353225708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,64,0,1,float16,float16,0,0.11361599961916606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,64,128,1,float16,fp8,0,0.1114453375339508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,64,128,1,fp8,fp8,0,0.10729066530863444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,64,0,1,float16,fp8,0,0.11361066500345866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,64,0,1,fp8,fp8,0,0.10962667067845662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,64,128,1,float16,float16,0,0.11035199960072835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,64,0,1,float16,float16,0,0.1114026705423991
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,64,128,1,float16,fp8,0,0.10812800129254659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,64,128,1,fp8,fp8,0,0.10794666409492493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,64,0,1,float16,fp8,0,0.11170666416486104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,64,0,1,fp8,fp8,0,0.11096533139546712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,64,128,1,float16,float16,0,0.0637066662311554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,64,0,1,float16,float16,0,0.0641599992911021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,64,128,1,float16,fp8,0,0.06428800026575725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,64,128,1,fp8,fp8,0,0.0666240006685257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,64,0,1,float16,fp8,0,0.0634986658891042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,64,128,1,float16,float16,0,0.06267733375231425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,64,0,1,fp8,fp8,0,0.06648533542950948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,64,0,1,float16,float16,0,0.06250666578610738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,64,128,1,float16,fp8,0,0.06136533121267954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,64,128,1,fp8,fp8,0,0.05816533168156942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,64,0,1,float16,fp8,0,0.06218666831652323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,64,0,1,fp8,fp8,0,0.06012799839178721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,64,0,1,float16,float16,0,0.06250133117039998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,64,128,1,float16,float16,0,0.06252799928188324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,64,128,1,float16,fp8,0,0.06234133243560791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,64,128,1,fp8,fp8,0,0.06015466650327047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,64,0,1,float16,fp8,0,0.062368000547091164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,64,0,1,fp8,fp8,0,0.0602453351020813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,64,128,1,float16,float16,0,0.06400000055631001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,64,0,1,float16,float16,0,0.06444799900054932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,64,128,1,float16,fp8,0,0.06443200012048085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,64,128,1,fp8,fp8,0,0.060138667623202004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,64,0,1,float16,fp8,0,0.06486933430035909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,64,0,1,fp8,fp8,0,0.0613919993241628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,64,128,1,float16,float16,0,0.06204266846179962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,64,0,1,float16,float16,0,0.06313066681226094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,64,128,1,float16,fp8,0,0.062218666076660156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,64,128,1,fp8,fp8,0,0.06035733222961426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,64,0,1,float16,fp8,0,0.06224533418814341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,64,0,1,fp8,fp8,0,0.062208001812299095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,64,128,1,float16,float16,0,0.03949866692225138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,64,0,1,float16,float16,0,0.04165333261092504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,64,128,1,float16,fp8,0,0.039877332746982574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,64,128,1,fp8,fp8,0,0.04181866844495138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,64,0,1,float16,fp8,0,0.04177066683769226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,64,0,1,fp8,fp8,0,0.042591998974482216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,64,128,1,float16,float16,0,0.039887999494870506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,64,128,1,float16,fp8,0,0.04008533308903376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,64,0,1,float16,float16,0,0.04180799921353658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,64,128,1,fp8,fp8,0,0.03961600114901861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,64,0,1,float16,fp8,0,0.041562666495641075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,64,0,1,fp8,fp8,0,0.039733332892258964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,64,128,1,float16,float16,0,0.039701332648595176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,64,0,1,float16,float16,0,0.04161600023508072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,64,128,1,float16,fp8,0,0.03986666599909464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,64,128,1,fp8,fp8,0,0.038592000802357994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,64,0,1,float16,fp8,0,0.04152533411979675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,64,0,1,fp8,fp8,0,0.041706666350364685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,64,128,1,float16,float16,0,0.03965866565704346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,64,0,1,float16,float16,0,0.04234133164087931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,64,128,1,float16,fp8,0,0.041637333730856575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,64,128,1,fp8,fp8,0,0.039861333866914116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,64,0,1,float16,fp8,0,0.04214933514595032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,64,0,1,fp8,fp8,0,0.04165866722663244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,64,128,1,float16,float16,0,0.039450667798519135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,64,128,1,float16,fp8,0,0.04138133426507314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,64,0,1,float16,float16,0,0.04206933577855428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,64,128,1,fp8,fp8,0,0.04154666761557261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,64,0,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,64,128,1,float16,float16,0,0.027215999861558277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,64,0,1,fp8,fp8,0,0.04238933324813843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,64,0,1,float16,float16,0,0.027461332579453785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,64,128,1,float16,fp8,0,0.0273333340883255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,64,128,1,fp8,fp8,0,0.027701333165168762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,64,0,1,float16,fp8,0,0.02738133321205775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,64,0,1,fp8,fp8,0,0.02718399961789449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,64,128,1,float16,float16,0,0.025802666942278545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,64,0,1,float16,float16,0,0.02733866622050603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,64,128,1,float16,fp8,0,0.027493332823117573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,64,128,1,fp8,fp8,0,0.02610666553179423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,64,0,1,float16,fp8,0,0.027610667049884796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,64,0,1,fp8,fp8,0,0.02735466758410136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,64,128,1,float16,float16,0,0.026672000686327618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,64,0,1,float16,float16,0,0.027306665976842243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,64,128,1,float16,fp8,0,0.027136000494162243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,64,0,1,float16,fp8,0,0.027450665831565857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,64,128,1,fp8,fp8,0,0.02758399893840154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,64,0,1,fp8,fp8,0,0.027141332626342773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,64,128,1,float16,float16,0,0.02731200059254964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,64,0,1,float16,float16,0,0.027632000545660656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,64,128,1,float16,fp8,0,0.02740799884001414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,64,128,1,fp8,fp8,0,0.027589333554108936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,64,0,1,fp8,fp8,0,0.027189334233601887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,64,0,1,float16,fp8,0,0.028575999041398365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,64,128,1,float16,float16,0,0.0271573339899381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,64,0,1,float16,float16,0,0.027280000348885853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,64,128,1,float16,fp8,0,0.027690666417280834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,64,128,1,fp8,fp8,0,0.027263998985290527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,64,0,1,float16,fp8,0,0.027658666173617046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,64,0,1,fp8,fp8,0,0.0277813325325648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,64,128,1,float16,float16,0,0.02347733328739802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,64,0,1,float16,float16,0,0.023247999449570973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,64,128,1,float16,fp8,0,0.023669332265853882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,64,128,1,fp8,fp8,0,0.02312533309062322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,64,0,1,float16,fp8,0,0.024143998821576435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,64,0,1,fp8,fp8,0,0.023984000086784363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,64,128,1,float16,float16,0,0.02312533309062322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,64,0,1,float16,float16,0,0.02311466634273529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,64,128,1,float16,fp8,0,0.023567999402681988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,64,128,1,fp8,fp8,0,0.02126399924357732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,64,0,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,64,0,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,64,128,1,float16,float16,0,0.02316266546646754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,64,0,1,float16,float16,0,0.023168000082174938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,64,128,1,float16,fp8,0,0.023760000864664715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,64,128,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,64,0,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,64,0,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,64,128,1,float16,float16,0,0.02312533309062322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,64,128,1,float16,fp8,0,0.023018665611743927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,64,0,1,float16,float16,0,0.023376000424226124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,64,128,1,fp8,fp8,0,0.022111999491850536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,64,0,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,64,0,1,fp8,fp8,0,0.023546665906906128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,64,128,1,float16,float16,0,0.02332799881696701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,64,0,1,float16,float16,0,0.023152001202106476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,64,128,1,float16,fp8,0,0.02351466566324234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,64,0,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,64,0,1,fp8,fp8,0,0.02365333338578542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,64,128,1,fp8,fp8,0,0.022085333863894146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,64,128,1,float16,float16,0,0.6991519927978516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,64,0,1,float16,float16,0,0.687280019124349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,64,128,1,float16,fp8,0,0.6959839661916097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,64,128,1,fp8,fp8,0,0.651253342628479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,64,0,1,float16,fp8,0,0.6790453592936198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,64,0,1,fp8,fp8,0,0.6376533508300781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,64,128,1,float16,float16,0,0.7085546652475992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,64,0,1,float16,float16,0,0.6935199896494547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,64,128,1,float16,fp8,0,0.7056799729665121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,64,128,1,fp8,fp8,0,0.6544533173243204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,64,0,1,float16,fp8,0,0.6901919841766357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,64,0,1,fp8,fp8,0,0.6426666577657064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,64,128,1,float16,float16,0,0.7211893399556478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,64,0,1,float16,float16,0,0.707482655843099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,64,128,1,float16,fp8,0,0.7180906931559244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,64,128,1,fp8,fp8,0,0.7478559811909994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,64,0,1,float16,fp8,0,0.7047039667765299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,64,0,1,fp8,fp8,0,0.7297813097635905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,64,128,1,float16,float16,0,0.7096052964528402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,64,0,1,float16,float16,0,0.6905226707458496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,64,128,1,float16,fp8,0,0.6973013083140055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,64,128,1,fp8,fp8,0,0.747770627339681
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,64,128,1,float16,float16,0,0.3692479928334554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,64,0,1,fp8,fp8,0,0.7252373695373535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,64,0,1,float16,fp8,0,0.6876533031463623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,64,0,1,float16,float16,0,0.365882674853007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,64,128,1,float16,fp8,0,0.36443201700846356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,64,128,1,fp8,fp8,0,0.37303467591603595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,64,0,1,fp8,fp8,0,0.3665813207626343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,64,0,1,float16,fp8,0,0.36207465330759686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,64,128,1,float16,float16,0,0.3575733502705892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,64,0,1,float16,float16,0,0.35051735242207843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,64,128,1,float16,fp8,0,0.3551413218180339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,64,128,1,fp8,fp8,0,0.3321066697438558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,64,0,1,float16,fp8,0,0.34811198711395264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,64,0,1,fp8,fp8,0,0.3221813241640727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,64,128,1,float16,float16,0,0.3627093235651652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,64,0,1,float16,float16,0,0.35623466968536377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,64,128,1,float16,fp8,0,0.360207994778951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,64,128,1,fp8,fp8,0,0.33607999483744305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,64,0,1,float16,fp8,0,0.3535093466440837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,64,0,1,fp8,fp8,0,0.32779733339945477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,64,128,1,float16,float16,0,0.3696800072987874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,64,0,1,float16,float16,0,0.36100268363952637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,64,128,1,float16,fp8,0,0.3657706578572591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,64,128,1,fp8,fp8,0,0.36081600189208984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,64,0,1,float16,fp8,0,0.3585493167241414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,64,0,1,fp8,fp8,0,0.3524746497472127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,64,128,1,float16,float16,0,0.36181334654490155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,64,0,1,float16,float16,0,0.35434667269388836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,64,128,1,float16,fp8,0,0.35569600264231366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,64,128,1,fp8,fp8,0,0.35994664827982586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,64,0,1,float16,fp8,0,0.35123733679453534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,64,0,1,fp8,fp8,0,0.35818668206532794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,64,128,1,float16,float16,0,0.19462400674819946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,64,0,1,float16,float16,0,0.19128000736236572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,64,128,1,float16,fp8,0,0.1935946742693583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,64,0,1,float16,fp8,0,0.1885813275973002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,64,128,1,fp8,fp8,0,0.19639466206232706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,64,0,1,fp8,fp8,0,0.19170133272806802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,64,128,1,float16,float16,0,0.1880426605542501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,64,0,1,float16,float16,0,0.18311999241511026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,64,128,1,float16,fp8,0,0.18553600708643594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,64,128,1,fp8,fp8,0,0.17494400342305502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,64,0,1,float16,fp8,0,0.18227734168370566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,64,0,1,fp8,fp8,0,0.1707520087560018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,64,128,1,float16,float16,0,0.1909280021985372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,64,0,1,float16,float16,0,0.18660799662272134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,64,128,1,float16,fp8,0,0.18982932964960733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,64,128,1,fp8,fp8,0,0.17805866400400797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,64,0,1,float16,fp8,0,0.18530666828155518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,64,0,1,fp8,fp8,0,0.17294933398564658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,64,0,1,float16,float16,0,0.19031999508539835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,64,128,1,float16,float16,0,0.1946613391240438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,64,128,1,float16,fp8,0,0.1937546730041504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,64,128,1,fp8,fp8,0,0.18860799074172974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,64,0,1,float16,fp8,0,0.18738667170206705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,64,0,1,fp8,fp8,0,0.18419732650121054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,64,128,1,float16,float16,0,0.19043733676274618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,64,0,1,float16,float16,0,0.18523732821146646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,64,128,1,float16,fp8,0,0.18802666664123535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,64,128,1,fp8,fp8,0,0.18806399901707968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,64,0,1,float16,fp8,0,0.1846133271853129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,64,0,1,fp8,fp8,0,0.18199467658996582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,64,128,1,float16,float16,0,0.1079253355662028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,64,0,1,float16,float16,0,0.10755733648935954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,64,128,1,float16,fp8,0,0.10556800166765849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,64,128,1,fp8,fp8,0,0.10797866185506184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,64,0,1,float16,fp8,0,0.1051093339920044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,64,0,1,fp8,fp8,0,0.1065120001633962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,64,128,1,float16,float16,0,0.0995360016822815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,64,0,1,float16,float16,0,0.09750933448473613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,64,128,1,float16,fp8,0,0.09907199939092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,64,128,1,fp8,fp8,0,0.09489599863688152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,64,0,1,float16,fp8,0,0.09711999694506328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,64,0,1,fp8,fp8,0,0.09242666761080424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,64,128,1,float16,float16,0,0.10140267014503479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,64,0,1,float16,float16,0,0.09900800387064616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,64,128,1,float16,fp8,0,0.10104533036549886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,64,128,1,fp8,fp8,0,0.09510933359464009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,64,0,1,float16,fp8,0,0.09820800026257832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,64,0,1,fp8,fp8,0,0.09462400277455647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,64,128,1,float16,float16,0,0.10337066650390625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,64,0,1,float16,float16,0,0.09941333532333374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,64,128,1,float16,fp8,0,0.10334933797518413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,64,128,1,fp8,fp8,0,0.1011253297328949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,64,0,1,float16,fp8,0,0.10098666946093242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,64,0,1,fp8,fp8,0,0.09839466214179993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,64,128,1,float16,float16,0,0.10178132851918538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,64,0,1,float16,float16,0,0.0990666647752126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,64,128,1,float16,fp8,0,0.1013813316822052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,64,0,1,float16,fp8,0,0.09918933113416036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,64,128,1,fp8,fp8,0,0.10224533081054688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,64,0,1,fp8,fp8,0,0.10014933347702026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,64,128,1,float16,float16,0,0.06087466577688853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,64,0,1,float16,float16,0,0.0589279979467392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,64,128,1,float16,fp8,0,0.05969599882761637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,64,0,1,float16,fp8,0,0.056741332014401756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,64,128,1,fp8,fp8,0,0.06406400104363759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,64,0,1,fp8,fp8,0,0.06006399790445963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,64,128,1,float16,float16,0,0.057989334066708885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,64,128,1,float16,fp8,0,0.058149332801500954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,64,0,1,float16,float16,0,0.05827199916044871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,64,128,1,fp8,fp8,0,0.05477866530418396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,64,0,1,float16,fp8,0,0.05793066819508871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,64,0,1,fp8,fp8,0,0.05411200225353241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,64,128,1,float16,float16,0,0.05808533231417338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,64,0,1,float16,float16,0,0.05753066639105479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,64,128,1,float16,fp8,0,0.05811200042565664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,64,128,1,fp8,fp8,0,0.05611733098824819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,64,0,1,float16,fp8,0,0.05691733459631602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,64,0,1,fp8,fp8,0,0.05606933434804281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,64,128,1,float16,float16,0,0.06032533446947733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,64,0,1,float16,float16,0,0.05826666454474131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,64,128,1,float16,fp8,0,0.06027733286221822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,64,128,1,fp8,fp8,0,0.05843733251094818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,64,0,1,float16,fp8,0,0.05831466615200043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,64,0,1,fp8,fp8,0,0.057429333527882896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,64,128,1,float16,float16,0,0.05832533538341522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,64,128,1,float16,fp8,0,0.058602665861447654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,64,0,1,float16,float16,0,0.05629866818586985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,64,128,1,fp8,fp8,0,0.058229332168896995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,64,0,1,float16,fp8,0,0.056517332792282104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,64,0,1,fp8,fp8,0,0.05625066657861074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,64,128,1,float16,float16,0,0.037605332831541695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,64,0,1,float16,float16,0,0.0360000009338061
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,64,128,1,float16,fp8,0,0.03752533346414566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,64,128,1,fp8,fp8,0,0.03938133269548416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,64,0,1,float16,fp8,0,0.0369759996732076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,64,0,1,fp8,fp8,0,0.037392000357309975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,64,128,1,float16,float16,0,0.03770133356253306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,64,0,1,float16,float16,0,0.03772799919048945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,64,128,1,float16,fp8,0,0.03749866783618927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,64,128,1,fp8,fp8,0,0.03569599986076355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,64,0,1,float16,fp8,0,0.037615999579429626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,64,0,1,fp8,fp8,0,0.03568000098069509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,64,128,1,float16,float16,0,0.03728000074625015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,64,0,1,float16,float16,0,0.03749866783618927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,64,128,1,float16,fp8,0,0.037834666669368744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,64,128,1,fp8,fp8,0,0.03583466758330663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,64,0,1,float16,fp8,0,0.03762666632731756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,64,0,1,fp8,fp8,0,0.0354720006386439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,64,128,1,float16,float16,0,0.03736533224582672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,64,128,1,float16,fp8,0,0.037920000652472176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,64,0,1,float16,float16,0,0.03731200098991394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,64,128,1,fp8,fp8,0,0.037445334096749626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,64,0,1,float16,fp8,0,0.03766400118668874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,64,0,1,fp8,fp8,0,0.03692800054947535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,64,128,1,float16,float16,0,0.037477334340413414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,64,0,1,float16,float16,0,0.03763733307520548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,64,128,1,float16,fp8,0,0.037658666570981346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,64,128,1,fp8,fp8,0,0.03807466725508372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,64,0,1,float16,fp8,0,0.037621334195137024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,64,0,1,fp8,fp8,0,0.037018666664759316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,64,128,1,float16,float16,0,0.025445332129796345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,64,0,1,float16,float16,0,0.025279998779296875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,64,128,1,float16,fp8,0,0.025978667040665943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,64,128,1,fp8,fp8,0,0.02526933451493581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,64,0,1,fp8,fp8,0,0.025610665480295818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,64,0,1,float16,fp8,0,0.025722667574882507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,64,128,1,float16,float16,0,0.025242666403452556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,64,0,1,float16,float16,0,0.023402666052182514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,64,128,1,float16,fp8,0,0.02497600018978119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,64,0,1,float16,fp8,0,0.023498666783173878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,64,0,1,fp8,fp8,0,0.02314666658639908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,64,128,1,fp8,fp8,0,0.024442667762438457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,64,128,1,float16,float16,0,0.025263999899228413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,64,0,1,float16,float16,0,0.024373332659403484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,64,128,1,float16,fp8,0,0.025173333783944447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,64,128,1,fp8,fp8,0,0.025114665428797405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,64,0,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,64,0,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,64,128,1,float16,float16,0,0.025386666258176167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,64,0,1,float16,float16,0,0.02566933383544286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,64,128,1,float16,fp8,0,0.025221332907676697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,64,128,1,fp8,fp8,0,0.025418666501839954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,64,0,1,float16,fp8,0,0.025685332715511322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,64,0,1,fp8,fp8,0,0.025093334416548412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,64,128,1,float16,float16,0,0.02535466601451238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,64,0,1,float16,float16,0,0.025461333493391674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,64,128,1,float16,fp8,0,0.025248001019159954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,64,128,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,64,0,1,fp8,fp8,0,0.02537599951028824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,64,0,1,float16,fp8,0,0.02532266577084859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,64,128,1,float16,float16,0,0.021210665504137676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,64,0,1,float16,float16,0,0.021205333371957142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,64,128,1,float16,fp8,0,0.021349333226680756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,64,128,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,64,0,1,float16,fp8,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,64,0,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,64,128,1,float16,float16,0,0.021509334444999695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,64,0,1,float16,float16,0,0.02125866711139679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,64,128,1,float16,fp8,0,0.02120000123977661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,64,128,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,64,0,1,float16,fp8,0,0.0205226664741834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,64,0,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,64,128,1,float16,float16,0,0.021301334102948506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,64,0,1,float16,float16,0,0.019861333072185516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,64,128,1,float16,fp8,0,0.021386665602525074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,64,128,1,fp8,fp8,0,0.02077866718173027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,64,0,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,64,0,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,64,128,1,float16,float16,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,64,0,1,float16,float16,0,0.020266667008399963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,64,128,1,float16,fp8,0,0.021418665846188862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,64,128,1,fp8,fp8,0,0.020773333807786305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,64,0,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,64,0,1,fp8,fp8,0,0.021002667645613354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,64,128,1,float16,float16,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,64,0,1,float16,float16,0,0.021226666867733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,64,128,1,float16,fp8,0,0.021221332252025604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,64,128,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,64,0,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,64,0,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,64,128,1,float16,float16,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,64,0,1,float16,float16,0,0.019493332753578823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,64,128,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,64,128,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,64,0,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,64,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,64,128,1,float16,float16,0,0.019296000401178997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,64,0,1,float16,float16,0,0.01893866683046023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,64,128,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,64,128,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,64,0,1,float16,fp8,0,0.01941866676012675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,64,0,1,fp8,fp8,0,0.018837332725524902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,64,128,1,float16,float16,0,0.019173332800467808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,64,0,1,float16,float16,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,64,128,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,64,128,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,64,0,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,64,0,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,64,128,1,float16,float16,0,0.020970667401949566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,64,0,1,float16,float16,0,0.01932799940307935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,64,128,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,64,128,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,64,0,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,64,0,1,fp8,fp8,0,0.01947733387351036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,64,128,1,float16,float16,0,0.019306667149066925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,64,0,1,float16,float16,0,0.01904533306757609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,64,128,1,float16,fp8,0,0.020954666038354237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,64,128,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,64,0,1,float16,fp8,0,0.020928000410397846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,64,0,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,64,128,1,float16,float16,0,0.3744853337605794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,64,0,1,float16,float16,0,0.3737866481145223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,64,128,1,float16,fp8,0,0.37062935034434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,64,128,1,fp8,fp8,0,0.35539201895395917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,64,0,1,float16,fp8,0,0.37160531679789227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,64,0,1,fp8,fp8,0,0.35578131675720215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,64,128,1,float16,float16,0,0.38018667697906494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,64,0,1,float16,float16,0,0.3788693348566691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,64,128,1,fp8,fp8,0,0.34654398759206134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,64,128,1,float16,fp8,0,0.3787093162536621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,64,0,1,fp8,fp8,0,0.3464053471883138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,64,0,1,float16,fp8,0,0.37829331556955975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,64,128,1,float16,float16,0,0.3888853391011556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,64,0,1,float16,float16,0,0.38978131612141925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,64,128,1,float16,fp8,0,0.38178133964538574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,64,128,1,fp8,fp8,0,0.3747093280156453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,64,0,1,float16,fp8,0,0.38224534193674725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,64,0,1,fp8,fp8,0,0.3791946570078532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,64,0,1,float16,float16,0,0.3777279853820801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,64,128,1,float16,float16,0,0.3774506648381551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,64,128,1,float16,fp8,0,0.3727253278096517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,64,128,1,fp8,fp8,0,0.38284798463185626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,64,128,1,float16,float16,0,0.202239990234375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,64,0,1,float16,fp8,0,0.3722933530807495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,64,0,1,fp8,fp8,0,0.37698666254679364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,64,0,1,float16,float16,0,0.20298133293787637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,64,128,1,float16,fp8,0,0.20189332962036133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,64,128,1,fp8,fp8,0,0.20198933283487955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,64,0,1,float16,fp8,0,0.2014133334159851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,64,0,1,fp8,fp8,0,0.20124799013137817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,64,128,1,float16,float16,0,0.19371734062830606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,64,0,1,float16,float16,0,0.1936053236325582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,64,128,1,float16,fp8,0,0.19246933857599893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,64,128,1,fp8,fp8,0,0.182586669921875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,64,0,1,float16,fp8,0,0.1927893360455831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,64,0,1,fp8,fp8,0,0.18239466349283853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,64,128,1,float16,float16,0,0.1958293318748474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,64,0,1,float16,float16,0,0.19589332739512125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,64,128,1,fp8,fp8,0,0.18041066328684488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,64,128,1,float16,fp8,0,0.19554666678110758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,64,0,1,float16,fp8,0,0.1941653291384379
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,64,0,1,fp8,fp8,0,0.18150399128595987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,64,128,1,float16,float16,0,0.20171733697255453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,64,128,1,float16,fp8,0,0.200272003809611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,64,0,1,float16,float16,0,0.20173333088556925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,64,128,1,fp8,fp8,0,0.19326933224995932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,64,0,1,float16,fp8,0,0.19998933871587118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,64,0,1,fp8,fp8,0,0.19407999515533447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,64,128,1,float16,float16,0,0.19771732886632284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,64,0,1,float16,float16,0,0.19598400592803955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,64,128,1,float16,fp8,0,0.1935840050379435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,64,128,1,fp8,fp8,0,0.19226133823394775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,64,128,1,float16,float16,0,0.11091199517250061
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,64,0,1,float16,fp8,0,0.19512534141540527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,64,0,1,fp8,fp8,0,0.1920106609662374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,64,0,1,float16,float16,0,0.11103999614715576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,64,128,1,float16,fp8,0,0.11100799838701884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,64,128,1,fp8,fp8,0,0.11186133821805318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,64,0,1,float16,fp8,0,0.11046399672826131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,64,0,1,fp8,fp8,0,0.11161067088445027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,64,128,1,float16,float16,0,0.10479467113812764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,64,0,1,float16,float16,0,0.10518399874369304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,64,128,1,float16,fp8,0,0.10504532853762309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,64,128,1,fp8,fp8,0,0.09917866190274556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,64,0,1,float16,fp8,0,0.10434666275978088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,64,0,1,fp8,fp8,0,0.09733333190282185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,64,128,1,float16,float16,0,0.10587732990582784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,64,0,1,float16,float16,0,0.10520000259081523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,64,128,1,float16,fp8,0,0.1053013304869334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,64,128,1,fp8,fp8,0,0.09733866651852925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,64,0,1,float16,fp8,0,0.10521066188812256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,64,0,1,fp8,fp8,0,0.09719467163085938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,64,128,1,float16,float16,0,0.10756267110506694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,64,0,1,float16,float16,0,0.10754666725794475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,64,128,1,float16,fp8,0,0.1069546639919281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,64,128,1,fp8,fp8,0,0.10392000277837117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,64,0,1,float16,fp8,0,0.10717866818110149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,64,0,1,fp8,fp8,0,0.10380799571673076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,64,128,1,float16,float16,0,0.10739733775456746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,64,0,1,float16,float16,0,0.107205331325531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,64,128,1,float16,fp8,0,0.10514666636784871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,64,128,1,fp8,fp8,0,0.10643733541170756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,64,0,1,float16,fp8,0,0.10642666618029277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,64,0,1,fp8,fp8,0,0.10566400488217671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,64,128,1,float16,float16,0,0.0602453351020813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,64,0,1,float16,float16,0,0.06000000238418579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,64,128,1,float16,fp8,0,0.06026133398214976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,64,128,1,fp8,fp8,0,0.06252266466617584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,64,0,1,float16,fp8,0,0.05997333427270254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,64,0,1,fp8,fp8,0,0.06198399762312571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,64,128,1,float16,float16,0,0.058373332023620605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,64,0,1,float16,float16,0,0.05811200042565664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,64,128,1,float16,fp8,0,0.05808533231417338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,64,0,1,float16,fp8,0,0.05792533357938131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,64,128,1,fp8,fp8,0,0.054986665646235146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,64,0,1,fp8,fp8,0,0.05516799787680308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,64,128,1,float16,float16,0,0.058229332168896995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,64,0,1,float16,float16,0,0.058362667759259544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,64,128,1,float16,fp8,0,0.05797333518664042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,64,128,1,fp8,fp8,0,0.05593066910902659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,64,0,1,float16,fp8,0,0.058186665177345276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,64,0,1,fp8,fp8,0,0.056133334835370384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,64,128,1,float16,float16,0,0.06010133524735769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,64,0,1,float16,float16,0,0.05994666616121928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,64,128,1,float16,fp8,0,0.05994133154551188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,64,128,1,fp8,fp8,0,0.05842133363087972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,64,0,1,float16,fp8,0,0.05990933378537496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,64,0,1,fp8,fp8,0,0.05819733440876007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,64,128,1,float16,float16,0,0.06006399790445963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,64,0,1,float16,float16,0,0.0583840012550354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,64,128,1,float16,fp8,0,0.058229332168896995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,64,128,1,fp8,fp8,0,0.0584853341182073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,64,0,1,float16,fp8,0,0.05901333192984263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,64,128,1,float16,float16,0,0.03969600051641464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,64,0,1,fp8,fp8,0,0.05808533231417338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,64,0,1,float16,float16,0,0.03979199876387914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,64,128,1,float16,fp8,0,0.03938133269548416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,64,128,1,fp8,fp8,0,0.03966933240493139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,64,0,1,float16,fp8,0,0.03862400104602178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,64,0,1,fp8,fp8,0,0.039701332648595176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,64,128,1,float16,float16,0,0.03764266769091288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,64,0,1,float16,float16,0,0.037808001041412354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,64,128,1,float16,fp8,0,0.0378506655494372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,64,128,1,fp8,fp8,0,0.037605332831541695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,64,0,1,float16,fp8,0,0.0393653338154157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,64,0,1,fp8,fp8,0,0.037605332831541695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,64,128,1,float16,float16,0,0.03775466730197271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,64,0,1,float16,float16,0,0.03992533435424169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,64,128,1,float16,fp8,0,0.0395413339138031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,64,128,1,fp8,fp8,0,0.037717332442601524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,64,0,1,float16,fp8,0,0.03961066653331121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,64,0,1,fp8,fp8,0,0.037717332442601524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,64,128,1,float16,float16,0,0.0400693342089653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,64,0,1,float16,float16,0,0.039674667020638786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,64,128,1,float16,fp8,0,0.03968533376852671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,64,128,1,fp8,fp8,0,0.037733333806196846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,64,0,1,float16,fp8,0,0.03956799954175949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,64,0,1,fp8,fp8,0,0.03774933268626531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,64,128,1,float16,float16,0,0.03868266691764196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,64,0,1,float16,float16,0,0.03881600002447764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,64,128,1,float16,fp8,0,0.03834133346875509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,64,128,1,fp8,fp8,0,0.037871999045213066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,64,0,1,float16,fp8,0,0.03829866647720337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,64,0,1,fp8,fp8,0,0.037845333417256675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,64,128,1,float16,float16,0,0.025946666797002155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,64,0,1,float16,float16,0,0.025605333348115284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,64,128,1,float16,fp8,0,0.025578667720158894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,64,128,1,fp8,fp8,0,0.027376001079877216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,64,0,1,float16,fp8,0,0.02756800005833308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,64,0,1,fp8,fp8,0,0.027301333844661713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,64,128,1,float16,float16,0,0.025583999852339428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,64,0,1,float16,float16,0,0.025370667378107708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,64,128,1,float16,fp8,0,0.025237334271272022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,64,128,1,fp8,fp8,0,0.025706666211287182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,64,0,1,float16,fp8,0,0.02531733363866806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,64,0,1,fp8,fp8,0,0.02513599892457326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,64,128,1,float16,float16,0,0.02698666602373123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,64,0,1,float16,float16,0,0.02555199960867564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,64,128,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,64,128,1,fp8,fp8,0,0.025392000873883564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,64,0,1,float16,fp8,0,0.025616000096003216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,64,0,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,64,128,1,float16,float16,0,0.025568000972270966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,64,0,1,float16,float16,0,0.025311999022960663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,64,128,1,float16,fp8,0,0.02757333219051361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,64,128,1,fp8,fp8,0,0.025418666501839954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,64,0,1,float16,fp8,0,0.02736533433198929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,64,0,1,fp8,fp8,0,0.027285332481066387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,64,128,1,float16,float16,0,0.025557334224383037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,64,0,1,float16,float16,0,0.025797332326571148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,64,128,1,float16,fp8,0,0.027295999228954315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,64,128,1,fp8,fp8,0,0.027295999228954315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,64,0,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,64,0,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,64,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,64,128,1,float16,float16,0,0.01942933350801468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,64,128,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,64,0,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,64,128,1,fp8,fp8,0,0.019461333751678467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,64,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,64,128,1,float16,float16,0,0.018746666610240936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,64,0,1,float16,float16,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,64,128,1,float16,fp8,0,0.019093333433071773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,64,0,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,64,128,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,64,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,64,0,1,float16,float16,0,0.019658666104078293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,64,128,1,float16,float16,0,0.019578666736682255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,64,128,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,64,0,1,float16,fp8,0,0.019440000255902607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,64,128,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,64,0,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,64,128,1,float16,float16,0,0.01940800001223882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,64,0,1,float16,float16,0,0.019440000255902607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,64,128,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,64,128,1,fp8,fp8,0,0.018842666099468868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,64,0,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,64,0,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,64,0,1,float16,float16,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,64,128,1,float16,float16,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,64,128,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,64,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,64,0,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,64,128,1,float16,float16,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,64,128,1,float16,fp8,0,0.017711999515692394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,64,0,1,float16,fp8,0,0.01869333287080129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,64,0,1,fp8,fp8,0,0.018810667097568512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,64,0,1,float16,float16,0,0.017642666896184284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,64,128,1,float16,fp8,0,0.017535999417304993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,64,128,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,64,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,64,0,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,64,128,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,64,0,1,float16,float16,0,0.017370666066805523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,64,128,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,64,128,1,fp8,fp8,0,0.017386666188637417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,64,0,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,64,128,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,64,0,1,float16,float16,0,0.017349333812793095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,64,128,1,float16,fp8,0,0.018645333747069042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,64,128,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,64,0,1,float16,fp8,0,0.01829333355029424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,64,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,64,128,1,float16,float16,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,64,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,64,128,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,64,128,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,64,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,64,0,1,float16,fp8,0,0.01815466706951459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,64,128,1,float16,float16,0,0.016810666769742966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,64,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,64,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,64,128,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,64,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,64,128,1,float16,float16,0,0.015397333850463232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,64,128,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,64,0,1,float16,fp8,0,0.017530667285124462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,64,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,64,128,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,64,0,1,float16,float16,0,0.01749333366751671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,64,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,64,0,1,fp8,fp8,0,0.015802666544914246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,64,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,64,128,1,fp8,fp8,0,0.015615999698638916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,64,0,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,64,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,64,128,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,64,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,64,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,64,128,1,float16,float16,0,0.2651466727256775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,64,0,1,float16,float16,0,0.2642293373743693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,64,128,1,float16,fp8,0,0.2627039949099223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,64,128,1,fp8,fp8,0,0.24782933791478476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,64,0,1,float16,fp8,0,0.2627039949099223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,64,0,1,fp8,fp8,0,0.24742400646209717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,64,128,1,float16,float16,0,0.2648533384005229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,64,0,1,float16,float16,0,0.2651626666386922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,64,128,1,float16,fp8,0,0.26428266366322833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,64,128,1,fp8,fp8,0,0.24284799893697104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,64,0,1,float16,fp8,0,0.26339733600616455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,64,0,1,fp8,fp8,0,0.24342934290568033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,64,128,1,float16,float16,0,0.26898666222890216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,64,0,1,float16,float16,0,0.27012266715367633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,64,128,1,float16,fp8,0,0.265882670879364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,64,128,1,fp8,fp8,0,0.254805326461792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,64,0,1,float16,fp8,0,0.2651519974072774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,64,0,1,fp8,fp8,0,0.25516800085703534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,64,128,1,float16,float16,0,0.26309865713119507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,64,0,1,float16,float16,0,0.26418666044871014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,64,128,1,float16,fp8,0,0.26361600557963055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,64,128,1,fp8,fp8,0,0.2535093426704407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,64,128,1,float16,float16,0,0.14481066664059958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,64,0,1,float16,fp8,0,0.2638079921404521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,64,0,1,fp8,fp8,0,0.25378666321436566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,64,0,1,float16,float16,0,0.14550933241844177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,64,128,1,float16,fp8,0,0.14452800154685974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,64,128,1,fp8,fp8,0,0.14352533221244812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,64,0,1,float16,fp8,0,0.14596266547838846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,64,0,1,fp8,fp8,0,0.14076266686121622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,64,128,1,float16,float16,0,0.13915733496348062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,64,0,1,float16,float16,0,0.1383680005868276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,64,128,1,float16,fp8,0,0.13826133807500204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,64,128,1,fp8,fp8,0,0.12786666552225748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,64,0,1,float16,fp8,0,0.1376426617304484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,64,0,1,fp8,fp8,0,0.12981866796811423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,64,128,1,float16,float16,0,0.1402239998181661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,64,0,1,float16,float16,0,0.14004266262054443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,64,128,1,float16,fp8,0,0.13799466689427695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,64,128,1,fp8,fp8,0,0.1302239994208018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,64,0,1,float16,fp8,0,0.13801067074139914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,64,0,1,fp8,fp8,0,0.12827733159065247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,64,128,1,float16,float16,0,0.14033599694569907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,64,0,1,float16,float16,0,0.1420693298180898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,64,128,1,float16,fp8,0,0.14044266939163208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,64,128,1,fp8,fp8,0,0.13387200236320496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,64,0,1,float16,fp8,0,0.14123200376828512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,64,0,1,fp8,fp8,0,0.1351626714070638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,64,128,1,float16,float16,0,0.13863466183344522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,64,0,1,float16,float16,0,0.1399946709473928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,64,128,1,fp8,fp8,0,0.1377173364162445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,64,0,1,float16,fp8,0,0.1406719982624054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,64,128,1,float16,fp8,0,0.14078933000564575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,64,0,1,fp8,fp8,0,0.13739200433095297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,64,128,1,float16,float16,0,0.07875733574231465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,64,0,1,float16,float16,0,0.07872533301512401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,64,128,1,float16,fp8,0,0.07914133369922638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,64,128,1,fp8,fp8,0,0.0784800002972285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,64,0,1,float16,fp8,0,0.07854400078455608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,64,0,1,fp8,fp8,0,0.07850666840871175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,64,128,1,float16,float16,0,0.07691733539104462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,64,0,1,float16,float16,0,0.07607999940713246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,64,128,1,float16,fp8,0,0.07652799785137177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,64,0,1,float16,fp8,0,0.07483733197053273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,64,128,1,fp8,fp8,0,0.0710453341404597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,64,0,1,fp8,fp8,0,0.07053333520889282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,64,128,1,float16,float16,0,0.0768746683994929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,64,128,1,float16,fp8,0,0.07650133470694225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,64,0,1,float16,float16,0,0.07717333237330119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,64,128,1,fp8,fp8,0,0.07233599821726482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,64,0,1,float16,fp8,0,0.07683733105659485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,64,0,1,fp8,fp8,0,0.07302399973074596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,64,128,1,float16,float16,0,0.07857066889603932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,64,0,1,float16,float16,0,0.07659199833869934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,64,128,1,float16,fp8,0,0.07809600234031677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,64,128,1,fp8,fp8,0,0.07470400134722392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,64,0,1,float16,fp8,0,0.07817066709200542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,64,0,1,fp8,fp8,0,0.07499733567237854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,64,128,1,float16,float16,0,0.07663999994595845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,64,0,1,float16,float16,0,0.07673599819342296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,64,128,1,float16,fp8,0,0.07680533329645793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,64,128,1,fp8,fp8,0,0.07445866862932841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,64,0,1,float16,fp8,0,0.07646400233109792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,64,128,1,float16,float16,0,0.04598933458328247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,64,0,1,fp8,fp8,0,0.07479999959468842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,64,0,1,float16,float16,0,0.04595200220743815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,64,128,1,float16,fp8,0,0.04604266583919525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,64,128,1,fp8,fp8,0,0.04663999875386556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,64,0,1,float16,fp8,0,0.04794133206208547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,64,0,1,fp8,fp8,0,0.04633066554864248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,64,0,1,float16,float16,0,0.04598933458328247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,64,128,1,float16,float16,0,0.04620266457398733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,64,128,1,float16,fp8,0,0.046181331078211464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,64,128,1,fp8,fp8,0,0.04408533374468485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,64,0,1,float16,fp8,0,0.04585599899291992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,64,0,1,fp8,fp8,0,0.04375466704368591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,64,128,1,float16,float16,0,0.047770669062932335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,64,0,1,float16,float16,0,0.04633066554864248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,64,128,1,float16,fp8,0,0.046015997727712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,64,128,1,fp8,fp8,0,0.04389866689840952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,64,0,1,float16,fp8,0,0.04608533283074697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,64,0,1,fp8,fp8,0,0.043866669138272606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,64,128,1,float16,float16,0,0.045893331368764244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,64,0,1,float16,float16,0,0.04611733555793762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,64,128,1,float16,fp8,0,0.04757333298524221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,64,128,1,fp8,fp8,0,0.045738667249679565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,64,0,1,float16,fp8,0,0.04804266492525736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,64,0,1,fp8,fp8,0,0.04613333443800608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,64,128,1,float16,float16,0,0.04614399870236715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,64,0,1,float16,float16,0,0.04743466774622599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,64,128,1,float16,fp8,0,0.04771199822425842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,64,128,1,fp8,fp8,0,0.045978665351867676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,64,0,1,float16,fp8,0,0.0472320020198822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,64,0,1,fp8,fp8,0,0.045882667104403176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,64,128,1,float16,float16,0,0.031328000128269196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,64,0,1,float16,float16,0,0.03161599983771642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,64,128,1,float16,fp8,0,0.03170666595300039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,64,128,1,fp8,fp8,0,0.031328000128269196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,64,0,1,float16,fp8,0,0.031125334401925404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,64,0,1,fp8,fp8,0,0.03046933313210805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,64,128,1,float16,float16,0,0.03143466760714849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,64,0,1,float16,float16,0,0.0296426663796107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,64,128,1,float16,fp8,0,0.029525332152843475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,64,128,1,fp8,fp8,0,0.029605334003766377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,64,0,1,float16,fp8,0,0.030437332888444264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,64,0,1,fp8,fp8,0,0.029696000119050343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,64,128,1,float16,float16,0,0.03140799949566523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,64,0,1,float16,float16,0,0.03151999910672506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,64,128,1,float16,fp8,0,0.03145600110292435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,64,128,1,fp8,fp8,0,0.03136533250411352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,64,0,1,float16,fp8,0,0.03176533430814743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,64,0,1,fp8,fp8,0,0.02958933264017105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,64,128,1,float16,float16,0,0.03166933357715607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,64,0,1,float16,float16,0,0.031888000667095184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,64,128,1,float16,fp8,0,0.03136000037193298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,64,128,1,fp8,fp8,0,0.02959999938805898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,64,0,1,float16,fp8,0,0.03153600047032038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,64,0,1,fp8,fp8,0,0.02951466788848241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,64,128,1,float16,float16,0,0.03138133386770884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,64,0,1,float16,float16,0,0.031583999594052635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,64,128,1,float16,fp8,0,0.03107200066248576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,64,128,1,fp8,fp8,0,0.029850666721661884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,64,0,1,float16,fp8,0,0.03151999910672506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,64,0,1,fp8,fp8,0,0.03147733211517334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,64,128,1,float16,float16,0,0.021354667842388153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,64,0,1,float16,float16,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,64,128,1,float16,fp8,0,0.023029332359631855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,64,128,1,fp8,fp8,0,0.023434666295846302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,64,0,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,64,128,1,float16,float16,0,0.021066665649414062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,64,0,1,fp8,fp8,0,0.02362666775782903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,64,0,1,float16,float16,0,0.021370666722456615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,64,128,1,float16,fp8,0,0.0220320001244545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,64,128,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,64,0,1,fp8,fp8,0,0.021104000508785248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,64,128,1,float16,float16,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,64,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,64,0,1,float16,float16,0,0.021226666867733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,64,128,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,64,128,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,64,0,1,float16,fp8,0,0.021669333179791767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,64,0,1,fp8,fp8,0,0.021365332106749218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,64,128,1,float16,float16,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,64,0,1,float16,float16,0,0.02199999988079071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,64,128,1,float16,fp8,0,0.02145066608985265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,64,128,1,fp8,fp8,0,0.021397332350413006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,64,0,1,fp8,fp8,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,64,0,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,64,128,1,float16,float16,0,0.021322667598724365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,64,0,1,float16,float16,0,0.021397332350413006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,64,128,1,fp8,fp8,0,0.02130666623512904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,64,128,1,float16,fp8,0,0.022426667312781017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,64,0,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,64,0,1,fp8,fp8,0,0.021189334491888683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,64,128,1,float16,float16,0,0.017786666750907898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,64,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,64,128,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,64,128,1,fp8,fp8,0,0.017525333911180496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,64,0,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,64,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,64,128,1,float16,float16,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,64,128,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,64,128,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,64,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,64,0,1,fp8,fp8,0,0.017594666530688603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,64,128,1,float16,float16,0,0.0174346665541331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,64,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,64,128,1,float16,fp8,0,0.018383999665578205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,64,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,64,128,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,64,128,1,float16,float16,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,64,128,1,float16,fp8,0,0.01754666616519292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,64,128,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,64,0,1,float16,fp8,0,0.017445333302021027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,64,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,64,128,1,float16,float16,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,64,0,1,float16,fp8,0,0.018895999838908512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,64,0,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,64,128,1,float16,float16,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,64,0,1,float16,float16,0,0.01749333366751671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,64,128,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,64,0,1,float16,fp8,0,0.01754133279124896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,64,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,64,128,1,float16,float16,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,64,128,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,64,0,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,64,128,1,float16,float16,0,0.01624533285697301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,64,0,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,64,0,1,float16,float16,0,0.017423999806245167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,64,128,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,64,0,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,64,128,1,float16,float16,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,64,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,64,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,64,128,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,64,128,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,64,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,64,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,64,128,1,float16,float16,0,0.016623999923467636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,64,128,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,64,128,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,64,0,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,64,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,64,128,1,float16,float16,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,64,128,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,64,128,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,64,128,1,float16,float16,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,64,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,64,0,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,64,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,64,128,1,float16,float16,0,0.015919999529918034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,64,0,1,float16,float16,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,64,128,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,64,128,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,64,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,64,0,1,fp8,fp8,0,0.01584533353646596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,64,128,1,float16,float16,0,0.016224000602960587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,64,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,64,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,64,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,64,0,1,fp8,fp8,0,0.016186666985352833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,64,128,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,64,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,64,128,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,64,0,1,fp8,fp8,0,0.015504000087579092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,64,128,1,float16,float16,0,0.20996799071629843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,64,0,1,float16,float16,0,0.20990399519602457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,64,128,1,float16,fp8,0,0.20984532435735068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,64,128,1,fp8,fp8,0,0.19142399231592813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,64,0,1,float16,fp8,0,0.20992000897725424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,64,0,1,fp8,fp8,0,0.19282132387161255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,64,128,1,float16,float16,0,0.21223467588424683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,64,0,1,float16,float16,0,0.21182399988174438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,64,128,1,fp8,fp8,0,0.1914506753285726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,64,128,1,float16,fp8,0,0.21057599782943726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,64,0,1,float16,fp8,0,0.20988800128300986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,64,0,1,fp8,fp8,0,0.1907306710879008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,64,128,1,float16,float16,0,0.21363200743993124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,64,0,1,float16,float16,0,0.21215466658274332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,64,128,1,float16,fp8,0,0.2119040091832479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,64,128,1,fp8,fp8,0,0.19751467307408652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,64,0,1,float16,fp8,0,0.2119413415590922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,64,0,1,fp8,fp8,0,0.1965493361155192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,64,128,1,float16,float16,0,0.21076265970865884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,64,0,1,float16,float16,0,0.21005332469940186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,64,128,1,float16,fp8,0,0.2103253404299418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,64,0,1,float16,fp8,0,0.2114773392677307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,64,128,1,fp8,fp8,0,0.19857599337895712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,64,0,1,fp8,fp8,0,0.1991999944051107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,64,128,1,float16,float16,0,0.11341866850852966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,64,0,1,float16,float16,0,0.11178666353225708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,64,128,1,float16,fp8,0,0.11294399698575337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,64,128,1,fp8,fp8,0,0.107205331325531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,64,0,1,float16,fp8,0,0.11218667030334473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,64,0,1,fp8,fp8,0,0.10936533411343892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,64,128,1,float16,float16,0,0.10963732997576396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,64,0,1,float16,float16,0,0.11162666479746501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,64,128,1,float16,fp8,0,0.11096533139546712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,64,128,1,fp8,fp8,0,0.10122666756312053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,64,0,1,float16,fp8,0,0.10983999570210774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,64,0,1,fp8,fp8,0,0.10130666693051656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,64,128,1,float16,float16,0,0.10944533348083496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,64,0,1,float16,float16,0,0.10977600018183391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,64,128,1,float16,fp8,0,0.10983999570210774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,64,128,1,fp8,fp8,0,0.1011306643486023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,64,0,1,float16,fp8,0,0.10966400305430095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,64,0,1,fp8,fp8,0,0.10107200344403584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,64,128,1,float16,float16,0,0.11179199814796448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,64,0,1,float16,float16,0,0.11178132891654968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,64,128,1,float16,fp8,0,0.11160533626874287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,64,128,1,fp8,fp8,0,0.10316800077756245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,64,0,1,float16,fp8,0,0.10972799857457478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,64,0,1,fp8,fp8,0,0.10495466987291972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,64,128,1,float16,float16,0,0.11173333724339803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,64,0,1,float16,float16,0,0.11155733466148376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,64,128,1,float16,fp8,0,0.10940266648928325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,64,128,1,fp8,fp8,0,0.1053493320941925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,64,0,1,float16,fp8,0,0.1113973359266917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,64,0,1,fp8,fp8,0,0.10521599650382996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,64,128,1,float16,float16,0,0.06442666550477345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,64,0,1,float16,float16,0,0.06515733400980632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,64,128,1,float16,fp8,0,0.06446933249632518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,64,128,1,fp8,fp8,0,0.062074666221936546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,64,0,1,float16,fp8,0,0.0643039991458257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,64,0,1,fp8,fp8,0,0.062352001667022705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,64,128,1,float16,float16,0,0.06412800153096516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,64,0,1,float16,float16,0,0.06414400041103363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,64,128,1,float16,fp8,0,0.06428800026575725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,64,128,1,fp8,fp8,0,0.06004266440868378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,64,0,1,float16,fp8,0,0.06429333488146464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,64,0,1,fp8,fp8,0,0.060458665092786155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,64,128,1,float16,float16,0,0.0644053320089976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,64,0,1,float16,float16,0,0.06457066535949707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,64,128,1,float16,fp8,0,0.06428266565004985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,64,128,1,fp8,fp8,0,0.059631998340288796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,64,0,1,float16,fp8,0,0.06445866823196411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,64,0,1,fp8,fp8,0,0.060266668597857155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,64,128,1,float16,float16,0,0.06453866759936015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,64,0,1,float16,float16,0,0.06433066725730896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,64,128,1,float16,fp8,0,0.06449066599210103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,64,128,1,fp8,fp8,0,0.06097066899140676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,64,0,1,float16,fp8,0,0.06443733473618825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,64,0,1,fp8,fp8,0,0.06228800117969513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,64,128,1,float16,float16,0,0.06438399851322174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,64,0,1,float16,float16,0,0.06442666550477345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,64,128,1,float16,fp8,0,0.06458666423956554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,64,0,1,float16,fp8,0,0.06445866823196411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,64,128,1,fp8,fp8,0,0.06181866427262624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,64,128,1,float16,float16,0,0.039605334401130676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,64,0,1,fp8,fp8,0,0.06190933287143707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,64,0,1,float16,float16,0,0.03811199963092804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,64,128,1,float16,fp8,0,0.038015998899936676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,64,128,1,fp8,fp8,0,0.03772266705830892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,64,0,1,float16,fp8,0,0.03957333415746689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,64,0,1,fp8,fp8,0,0.03749333322048187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,64,128,1,float16,float16,0,0.037530665596326195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,64,0,1,float16,float16,0,0.037808001041412354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,64,128,1,float16,fp8,0,0.03930133332808813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,64,128,1,fp8,fp8,0,0.03568000098069509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,64,0,1,float16,fp8,0,0.03751466671625773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,64,0,1,fp8,fp8,0,0.03562133262554804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,64,128,1,float16,float16,0,0.03946666667858759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,64,0,1,float16,float16,0,0.037621334195137024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,64,128,1,float16,fp8,0,0.03989866624275843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,64,128,1,fp8,fp8,0,0.03782399992148081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,64,0,1,float16,fp8,0,0.03957866628964742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,64,0,1,fp8,fp8,0,0.03758399933576584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,64,128,1,float16,float16,0,0.03942933430274328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,64,0,1,float16,float16,0,0.03979733337958654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,64,128,1,float16,fp8,0,0.03833599885304769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,64,128,1,fp8,fp8,0,0.0376800000667572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,64,0,1,float16,fp8,0,0.039461334546407066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,64,0,1,fp8,fp8,0,0.03766933331886927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,64,128,1,float16,float16,0,0.037791999677817024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,64,0,1,float16,float16,0,0.03956799954175949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,64,128,1,float16,fp8,0,0.03782399992148081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,64,128,1,fp8,fp8,0,0.03741333385308584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,64,0,1,float16,fp8,0,0.03760000069936117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,64,0,1,fp8,fp8,0,0.03772799919048945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,64,128,1,float16,float16,0,0.02720000098148982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,64,0,1,float16,float16,0,0.027301333844661713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,64,128,1,float16,fp8,0,0.027765333652496338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,64,128,1,fp8,fp8,0,0.027386667827765148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,64,0,1,float16,fp8,0,0.027610667049884796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,64,0,1,fp8,fp8,0,0.02734400083621343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,64,128,1,float16,float16,0,0.027130665878454845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,64,0,1,float16,float16,0,0.025519999365011852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,64,128,1,float16,fp8,0,0.027621333797772724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,64,128,1,fp8,fp8,0,0.02518933266401291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,64,0,1,float16,fp8,0,0.02762666592995326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,64,0,1,fp8,fp8,0,0.02613866577545802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,64,128,1,float16,float16,0,0.02604266752799352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,64,0,1,float16,float16,0,0.027386667827765148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,64,128,1,float16,fp8,0,0.027130665878454845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,64,128,1,fp8,fp8,0,0.0266239990790685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,64,0,1,float16,fp8,0,0.02624000112215678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,64,0,1,fp8,fp8,0,0.02548266698916753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,64,128,1,float16,float16,0,0.027269333600997925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,64,0,1,float16,float16,0,0.025472000241279602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,64,128,1,fp8,fp8,0,0.02737066646416982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,64,128,1,float16,fp8,0,0.026863999664783478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,64,0,1,float16,fp8,0,0.027482666075229645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,64,0,1,fp8,fp8,0,0.027477333943049114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,64,128,1,float16,float16,0,0.02657066782315572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,64,0,1,float16,float16,0,0.027402666707833607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,64,128,1,float16,fp8,0,0.027376001079877216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,64,0,1,float16,fp8,0,0.027242665489514668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,64,128,1,fp8,fp8,0,0.02754666656255722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,64,0,1,fp8,fp8,0,0.025519999365011852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,64,128,1,float16,float16,0,0.02110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,64,0,1,float16,float16,0,0.021381333470344543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,64,128,1,float16,fp8,0,0.02000533292690913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,64,128,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,64,0,1,float16,fp8,0,0.020506666352351505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,64,0,1,fp8,fp8,0,0.019685332973798115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,64,128,1,float16,float16,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,64,0,1,float16,float16,0,0.019445333629846573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,64,128,1,float16,fp8,0,0.02147199958562851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,64,128,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,64,0,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,64,0,1,fp8,fp8,0,0.019999999552965164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,64,128,1,float16,float16,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,64,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,64,128,1,float16,fp8,0,0.020917333662509918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,64,128,1,fp8,fp8,0,0.020784000555674236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,64,0,1,float16,fp8,0,0.021221332252025604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,64,128,1,float16,float16,0,0.021013334393501282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,64,0,1,fp8,fp8,0,0.02126399924357732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,64,0,1,float16,float16,0,0.021013334393501282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,64,128,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,64,128,1,fp8,fp8,0,0.019546666493018467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,64,0,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,64,0,1,fp8,fp8,0,0.020848001043001812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,64,128,1,float16,float16,0,0.01971199984351794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,64,0,1,float16,float16,0,0.019434666881958645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,64,128,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,64,128,1,fp8,fp8,0,0.020986666282018025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,64,0,1,float16,fp8,0,0.019440000255902607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,64,0,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,64,128,1,float16,float16,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,64,0,1,float16,float16,0,0.016224000602960587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,64,128,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,64,0,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,64,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,64,128,1,float16,float16,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,64,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,64,128,1,fp8,fp8,0,0.016303999970356624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,64,0,1,float16,fp8,0,0.017509333789348602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,64,0,1,fp8,fp8,0,0.01545599972208341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,64,128,1,float16,float16,0,0.016085332880417507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,64,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,64,128,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,64,0,1,fp8,fp8,0,0.015674666812022526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,64,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,64,0,1,float16,float16,0,0.016602666427691776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,64,128,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,64,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,64,128,1,float16,float16,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,64,128,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,64,128,1,fp8,fp8,0,0.017370666066805523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,64,0,1,float16,fp8,0,0.016751999656359356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,64,0,1,fp8,fp8,0,0.016063999384641647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,64,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,64,128,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,64,128,1,fp8,fp8,0,0.015370666980743408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,64,0,1,float16,fp8,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,64,0,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,64,0,1,float16,float16,0,0.01685333376129468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,64,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,64,128,1,float16,float16,0,0.01515199989080429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,64,128,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,64,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,64,128,1,float16,float16,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,64,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,64,128,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,64,128,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,64,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,64,128,1,float16,float16,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,64,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,64,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,64,0,1,fp8,fp8,0,0.016762666404247284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,64,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,64,128,1,float16,float16,0,0.015450666348139444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,64,0,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,64,0,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,64,128,1,float16,float16,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,64,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,64,128,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,64,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,64,128,1,float16,float16,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,64,128,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,64,128,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,64,0,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,64,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,64,0,1,float16,fp8,0,0.015418666104475657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,64,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,64,128,1,float16,float16,0,0.015135999768972397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,64,0,1,float16,float16,0,0.016837333639462788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,64,128,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,64,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,64,128,1,float16,float16,0,0.1810879906018575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,64,0,1,float16,float16,0,0.1795466740926107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,64,128,1,float16,fp8,0,0.18053332964579263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,64,128,1,fp8,fp8,0,0.16475199659665427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,64,0,1,float16,fp8,0,0.17938133080800375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,64,0,1,fp8,fp8,0,0.16491199533144632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,64,128,1,float16,float16,0,0.18145066499710083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,64,0,1,float16,float16,0,0.18093333641688028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,64,128,1,float16,fp8,0,0.17920533816019693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,64,128,1,fp8,fp8,0,0.16527466972668967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,64,0,1,fp8,fp8,0,0.1649493376413981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,64,0,1,float16,fp8,0,0.18079467614491782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,64,128,1,float16,float16,0,0.18266665935516357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,64,0,1,float16,float16,0,0.18131732940673828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,64,128,1,float16,fp8,0,0.1811359922091166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,64,128,1,fp8,fp8,0,0.16876800855000815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,64,0,1,fp8,fp8,0,0.16873600085576376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,64,0,1,float16,fp8,0,0.1811359922091166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,64,128,1,float16,float16,0,0.18164799610773721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,64,0,1,float16,float16,0,0.1811093290646871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,64,128,1,float16,fp8,0,0.17987199624379477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,64,128,1,fp8,fp8,0,0.16876266400019327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,64,128,1,float16,float16,0,0.09734933574994405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,64,0,1,float16,float16,0,0.09737599889437358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,64,0,1,fp8,fp8,0,0.1681013305981954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,64,0,1,float16,fp8,0,0.18152532974878946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,64,128,1,float16,fp8,0,0.09760533769925435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,64,128,1,fp8,fp8,0,0.09309867024421692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,64,0,1,float16,fp8,0,0.0978559950987498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,64,0,1,fp8,fp8,0,0.09355200330416362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,64,128,1,float16,float16,0,0.09729066491127014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,64,0,1,float16,float16,0,0.09744532903035481
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,64,128,1,float16,fp8,0,0.09705600142478943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,64,128,1,fp8,fp8,0,0.09087999661763509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,64,0,1,float16,fp8,0,0.09737599889437358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,64,0,1,fp8,fp8,0,0.09098133444786072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,64,128,1,float16,float16,0,0.0988159974416097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,64,0,1,float16,float16,0,0.09916266798973083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,64,128,1,float16,fp8,0,0.09935999910036723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,64,128,1,fp8,fp8,0,0.09119466940561931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,64,0,1,float16,fp8,0,0.09911466638247173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,64,0,1,fp8,fp8,0,0.09105599919954936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,64,128,1,float16,float16,0,0.09962667028109233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,64,0,1,float16,float16,0,0.0993226667245229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,64,128,1,float16,fp8,0,0.09940266609191895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,64,128,1,fp8,fp8,0,0.09133332967758179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,64,0,1,float16,fp8,0,0.09898666540781657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,64,0,1,fp8,fp8,0,0.09286933143933614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,64,128,1,float16,float16,0,0.09729066491127014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,64,0,1,float16,float16,0,0.09730133414268494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,64,128,1,float16,fp8,0,0.0974720021088918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,64,128,1,fp8,fp8,0,0.09303999940554301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,64,0,1,float16,fp8,0,0.09734400113423665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,64,0,1,fp8,fp8,0,0.09293333689371745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,64,128,1,float16,float16,0,0.057855998476346336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,64,0,1,float16,float16,0,0.05817066629727682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,64,128,1,float16,fp8,0,0.0581279993057251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,64,128,1,fp8,fp8,0,0.05417066812515259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,64,0,1,float16,fp8,0,0.05805333455403646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,64,0,1,fp8,fp8,0,0.0543093333641688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,64,128,1,float16,float16,0,0.056090667843818665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,64,0,1,float16,float16,0,0.055946667989095054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,64,128,1,float16,fp8,0,0.05598933498064677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,64,128,1,fp8,fp8,0,0.0521066685517629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,64,0,1,float16,fp8,0,0.05597866574923197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,64,0,1,fp8,fp8,0,0.05193600058555603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,64,0,1,float16,float16,0,0.056277334690093994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,64,128,1,float16,float16,0,0.056287998954455055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,64,128,1,float16,fp8,0,0.05598400036493937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,64,0,1,float16,fp8,0,0.056405335664749146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,64,128,1,fp8,fp8,0,0.05194666484991709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,64,0,1,fp8,fp8,0,0.052202666799227394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,64,128,1,float16,float16,0,0.05796800057093302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,64,0,1,float16,float16,0,0.05752533177534739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,64,128,1,float16,fp8,0,0.05796800057093302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,64,128,1,fp8,fp8,0,0.05422399938106537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,64,0,1,float16,fp8,0,0.05784533421198527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,64,0,1,fp8,fp8,0,0.05420800050099691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,64,128,1,float16,float16,0,0.057434668143590294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,64,0,1,float16,float16,0,0.056234667698542275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,64,128,1,float16,fp8,0,0.0582826683918635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,64,128,1,fp8,fp8,0,0.05393599967161814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,64,0,1,float16,fp8,0,0.05754666527112325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,64,0,1,fp8,fp8,0,0.054085334142049156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,64,128,1,float16,float16,0,0.033674667278925575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,64,0,1,float16,float16,0,0.033728001018365227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,64,128,1,float16,fp8,0,0.03440533330043157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,64,128,1,fp8,fp8,0,0.03340800106525421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,64,0,1,float16,fp8,0,0.03387733300526937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,64,0,1,fp8,fp8,0,0.033258666594823204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,64,128,1,float16,float16,0,0.033887999753157295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,64,0,1,float16,float16,0,0.03342399994532267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,64,128,1,float16,fp8,0,0.03379199902216593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,64,128,1,fp8,fp8,0,0.03330666571855545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,64,0,1,float16,fp8,0,0.033813332517941795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,64,0,1,fp8,fp8,0,0.03159466634194056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,64,128,1,float16,float16,0,0.03533866753180822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,64,0,1,float16,float16,0,0.03536533315976461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,64,128,1,float16,fp8,0,0.03554133325815201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,64,128,1,fp8,fp8,0,0.032501332461833954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,64,0,1,float16,fp8,0,0.03549333413441976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,64,0,1,fp8,fp8,0,0.03356799980004629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,64,0,1,float16,float16,0,0.033957332372665405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,64,128,1,float16,fp8,0,0.03344533344109853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,64,128,1,float16,float16,0,0.03443199892838796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,64,128,1,fp8,fp8,0,0.03375466664632162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,64,0,1,float16,fp8,0,0.035546667873859406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,64,0,1,fp8,fp8,0,0.03127466638882955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,64,128,1,float16,float16,0,0.03416533271471659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,64,0,1,float16,float16,0,0.03380800038576126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,64,128,1,float16,fp8,0,0.03549866626660029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,64,128,1,fp8,fp8,0,0.03331733246644338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,64,0,1,float16,fp8,0,0.03347733368476232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,64,0,1,fp8,fp8,0,0.033573334415753685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,64,128,1,float16,float16,0,0.025424001117547352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,64,0,1,float16,float16,0,0.02508266766866048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,64,128,1,float16,fp8,0,0.025455998877684276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,64,128,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,64,0,1,float16,fp8,0,0.025861332813898723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,64,0,1,fp8,fp8,0,0.02513599892457326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,64,128,1,float16,float16,0,0.025701334079106648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,64,0,1,float16,float16,0,0.025301332275072735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,64,128,1,float16,fp8,0,0.025263999899228413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,64,128,1,fp8,fp8,0,0.025077333052953083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,64,0,1,float16,fp8,0,0.02532800038655599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,64,0,1,fp8,fp8,0,0.02550400048494339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,64,128,1,float16,float16,0,0.025279998779296875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,64,0,1,float16,float16,0,0.024432001014550526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,64,128,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,64,128,1,fp8,fp8,0,0.023317334552605946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,64,0,1,float16,fp8,0,0.0252960001428922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,64,0,1,fp8,fp8,0,0.023237332701683044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,64,128,1,float16,float16,0,0.025311999022960663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,64,128,1,float16,fp8,0,0.025274666647116344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,64,0,1,float16,float16,0,0.025146665672461193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,64,128,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,64,0,1,float16,fp8,0,0.025098666548728943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,64,0,1,fp8,fp8,0,0.025285333395004272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,64,128,1,float16,float16,0,0.0252960001428922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,64,0,1,float16,float16,0,0.025274666647116344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,64,128,1,float16,fp8,0,0.025514667232831318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,64,128,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,64,0,1,float16,fp8,0,0.02514133354028066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,64,128,1,float16,float16,0,0.01952533299724261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,64,0,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,64,0,1,float16,float16,0,0.01915733392039935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,64,128,1,float16,fp8,0,0.02011200040578842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,64,128,1,fp8,fp8,0,0.0195573332409064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,64,0,1,float16,fp8,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,64,128,1,float16,float16,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,64,0,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,64,0,1,float16,float16,0,0.018954666952292126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,64,128,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,64,128,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,64,0,1,float16,fp8,0,0.019093333433071773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,64,128,1,float16,float16,0,0.019130667050679524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,64,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,64,0,1,float16,float16,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,64,128,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,64,128,1,fp8,fp8,0,0.019509332875410717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,64,0,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,64,0,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,64,0,1,float16,float16,0,0.0189280000825723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,64,128,1,float16,fp8,0,0.019274666905403137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,64,128,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,64,0,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,64,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,64,128,1,float16,float16,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,64,0,1,float16,float16,0,0.018965333700180054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,64,128,1,float16,fp8,0,0.01926400015751521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,64,128,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,64,0,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,64,0,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,64,128,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,64,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,64,128,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,64,128,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,64,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,64,0,1,fp8,fp8,0,0.016688000410795212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,64,128,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,64,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,64,128,1,float16,fp8,0,0.016735999534527462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,64,128,1,fp8,fp8,0,0.017445333302021027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,64,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,64,128,1,float16,float16,0,0.016794666647911072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,64,0,1,float16,float16,0,0.01602666700879733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,64,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,64,128,1,float16,float16,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,64,0,1,float16,float16,0,0.01587733378012975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,64,128,1,float16,fp8,0,0.017551999539136887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,64,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,64,128,1,float16,float16,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,64,128,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,64,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,64,128,1,float16,float16,0,0.015402667224407196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,64,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,64,128,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,64,128,1,fp8,fp8,0,0.015487999965747198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,64,0,1,fp8,fp8,0,0.016783999900023144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,64,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,64,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,64,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,64,128,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,64,128,1,float16,float16,0,0.016757333030303318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,64,0,1,float16,float16,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,64,128,1,float16,fp8,0,0.015674666812022526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,64,128,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,64,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,64,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,64,0,1,float16,fp8,0,0.016741332908471424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,64,128,1,float16,float16,0,0.015263999501864115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,64,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,64,0,1,float16,float16,0,0.016528000434239704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,64,128,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,64,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,64,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,64,0,1,float16,float16,0,0.01684800038735072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,64,128,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,64,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,64,128,1,float16,float16,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,64,128,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,64,128,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,64,0,1,float16,fp8,0,0.015386667102575302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,64,128,1,float16,float16,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,64,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,64,0,1,fp8,fp8,0,0.015919999529918034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,64,128,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,64,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,64,128,1,float16,float16,0,0.015344000111023584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,64,128,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,64,128,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,64,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,64,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,64,128,1,float16,float16,0,0.015450666348139444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,64,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,64,128,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,0,0.1548906664053599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,0,0.15481600165367126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,0,0.15494400262832642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,1,64,128,1,fp8,fp8,0,0.14231466253598532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,0,0.1553546686967214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,1,64,0,1,fp8,fp8,0,0.1420960028966268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,0,0.1553759972254435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,0,0.15502933661142984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,0,0.155130664507548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,2,64,128,1,fp8,fp8,0,0.14243200421333313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,0,0.15618667006492615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,2,64,0,1,fp8,fp8,0,0.1421119968096415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,0,0.15522133310635886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,0,0.15467733144760132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,0,0.15478932857513428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,4,64,128,1,fp8,fp8,0,0.1422719955444336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,0,0.15465066830317178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,4,64,0,1,fp8,fp8,0,0.14216533303260803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,0,0.15550933281580606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,0,0.15460800131162009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,0,0.1545866628487905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,8,64,128,1,fp8,fp8,0,0.14232533176740012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,8,64,0,1,fp8,fp8,0,0.14229333400726318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,0,0.1548960010210673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,32,64,128,1,float16,float16,0,0.08476266264915466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,0,0.08505599697430928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,32,64,128,1,float16,fp8,0,0.0849226713180542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,32,64,128,1,fp8,fp8,0,0.07717866698900859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,0,0.08442667126655579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,32,64,0,1,fp8,fp8,0,0.07865599791208903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,0,0.08292266726493835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,0,0.082805335521698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,1,64,128,1,fp8,fp8,0,0.07689066727956136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,0,0.08523733417193095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,0,0.08490133285522461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,1,64,0,1,fp8,fp8,0,0.07838400204976399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,0,0.08328000207742055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,0,0.0855466624101003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,0,0.08462933699289958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,2,64,128,1,fp8,fp8,0,0.07866133252779643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,0,0.08497066299120586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,0,0.08505066235860188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,2,64,0,1,fp8,fp8,0,0.07730133334795634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,0,0.08497066299120586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,0,0.08515200018882751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,4,64,128,1,fp8,fp8,0,0.07855999966462453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,4,64,0,1,fp8,fp8,0,0.0784853349129359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,0,0.08483200271924336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,0,0.08461333314577739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,0,0.08489599823951721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,0,0.08319999774297078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,8,64,128,1,fp8,fp8,0,0.07864533364772797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,0,0.08482666810353597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,32,64,128,1,float16,float16,0,0.04977599779764811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,0,0.0498879998922348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,8,64,0,1,fp8,fp8,0,0.07869866490364075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,32,64,128,1,float16,fp8,0,0.050026665131251015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,32,64,128,1,fp8,fp8,0,0.0459146648645401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,0,0.05017066498597463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,32,64,0,1,fp8,fp8,0,0.04593066871166229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,0,0.050016000866889954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,0,0.04978133241335551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,0,0.049925332268079124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,1,64,128,1,fp8,fp8,0,0.04619733492533366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,0,0.04994133114814758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,1,64,0,1,fp8,fp8,0,0.04753600060939789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,0,0.04983466863632202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,0,0.04996266464392344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,0,0.050154666105906166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,2,64,128,1,fp8,fp8,0,0.045781334241231285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,0,0.050442665815353394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,2,64,0,1,fp8,fp8,0,0.04776533444722494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,0,0.05014933149019877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,0,0.04995200037956238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,0,0.05020266771316528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,4,64,128,1,fp8,fp8,0,0.04816000163555145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,0,0.05012266834576925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,4,64,0,1,fp8,fp8,0,0.04778666794300079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,0,0.050053333242734276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,0,0.05017066498597463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,0,0.050341332952181496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,8,64,128,1,fp8,fp8,0,0.04770666857560476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,0,0.05004799862702688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,8,64,0,1,fp8,fp8,0,0.046298667788505554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,32,64,128,1,float16,float16,0,0.03145066648721695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,0,0.03209066639343897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,32,64,128,1,float16,fp8,0,0.03161599983771642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,32,64,128,1,fp8,fp8,0,0.029882666965325672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,0,0.033376000821590424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,32,64,0,1,fp8,fp8,0,0.031285333136717476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,0,0.03141333411137263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,0,0.03346666693687439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,0,0.033471999069054924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,1,64,128,1,fp8,fp8,0,0.029850666721661884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,0,0.033717334270477295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,1,64,0,1,fp8,fp8,0,0.029792000850041706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,0,0.03275733441114426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,0,0.03153600047032038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,0,0.03165333221356074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,2,64,128,1,fp8,fp8,0,0.03145600110292435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,0,0.03179733455181122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,2,64,0,1,fp8,fp8,0,0.029951999584833782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,0,0.031583999594052635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,0,0.03146666785081228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,0,0.03161066770553589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,4,64,128,1,fp8,fp8,0,0.029658667743206024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,0,0.03142933299144109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,4,64,0,1,fp8,fp8,0,0.03154666721820831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,0,0.03365866591533025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,0,0.03331733246644338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,0,0.03341866781314214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,8,64,128,1,fp8,fp8,0,0.031258667508761086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,0,0.03356266766786575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,8,64,0,1,fp8,fp8,0,0.0296426663796107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,32,64,128,1,float16,float16,0,0.02555199960867564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,0,0.023050665855407715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,32,64,128,1,float16,fp8,0,0.02345066765944163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,32,64,128,1,fp8,fp8,0,0.023562667270501454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,32,64,0,1,fp8,fp8,0,0.023685333629449207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,0,0.023455999791622162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,0,0.025407999753952026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,1,64,128,1,fp8,fp8,0,0.02330133318901062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,0,0.025573333104451496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,1,64,0,1,fp8,fp8,0,0.02369066576162974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,0,0.023557332654794056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,0,0.025301332275072735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,0,0.02508266766866048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,2,64,128,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,0,0.02554133286078771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,2,64,0,1,fp8,fp8,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,0,0.024869332710901897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,0,0.023445333043734234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,0,0.023455999791622162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,4,64,128,1,fp8,fp8,0,0.02348266790310542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,0,0.02351466566324234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,4,64,0,1,fp8,fp8,0,0.023413332800070446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,0,0.025050667424996693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,8,64,128,1,fp8,fp8,0,0.023658665517965954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,0,0.023365333676338196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,32,64,128,1,float16,float16,0,0.02081599955757459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,8,64,0,1,fp8,fp8,0,0.023418667415777843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,32,64,128,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,32,64,128,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,0,0.020997333029905956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,32,64,0,1,fp8,fp8,0,0.019386666516462963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,1,64,128,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,0,0.02063999945918719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,1,64,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,0,0.01977066695690155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,0,0.019424000134070713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,2,64,128,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,2,64,0,1,fp8,fp8,0,0.018719999740521114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,0,0.01941866676012675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,4,64,128,1,fp8,fp8,0,0.019573333362738293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,0,0.0206986665725708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,4,64,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,0,0.01923199991385142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,0,0.020021333048741024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,0,0.02082666630546252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,8,64,128,1,fp8,fp8,0,0.018960000326236088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,8,64,0,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,0,0.01982933282852173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,32,64,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,32,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,32,64,128,1,fp8,fp8,0,0.015850666910409927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,32,64,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,0,0.015354666858911514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,1,64,128,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,1,64,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,2,64,128,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,2,64,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,4,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,4,64,0,1,fp8,fp8,0,0.016229332735141117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,8,64,128,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,8,64,0,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,32,64,128,1,float16,float16,0,0.014954666296641031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,32,64,128,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,32,64,128,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,32,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,1,64,128,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,1,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,0,0.01664000004529953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,2,64,128,1,fp8,fp8,0,0.016362667083740234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,2,64,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,4,64,128,1,fp8,fp8,0,0.015829333414634068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,0,0.016271999726692837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,4,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,0,0.015317333241303762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,8,64,128,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,8,64,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,32,64,128,1,float16,float16,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,0,0.016751999656359356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,32,64,128,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,32,64,128,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,32,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,0,0.015034666905800501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,1,64,128,1,fp8,fp8,0,0.015413332730531693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,1,64,0,1,fp8,fp8,0,0.01682666689157486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,0,0.014959999670584997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,2,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,2,64,0,1,fp8,fp8,0,0.015541333705186844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,0,0.015429332852363586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,4,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,4,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,0,0.01670933390657107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,8,64,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,8,64,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,64,128,1,float16,float16,0,1.356122652689616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,64,128,1,float16,fp8,0,1.369157314300537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,64,128,1,fp8,fp8,0,1.2349013487497966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,64,0,1,float16,float16,0,8.35092290242513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,64,0,1,float16,fp8,0,8.115989049275717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,64,128,1,float16,float16,0,1.3780266443888347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,64,0,1,fp8,fp8,0,7.404805501302083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,64,128,1,float16,fp8,0,1.3878666559855144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,64,128,1,fp8,fp8,0,1.2588053544362385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,64,128,1,float16,float16,0,1.3886720339457195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,64,0,1,float16,float16,0,8.018128077189127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,64,128,1,float16,fp8,0,1.4012266794840496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,64,0,1,fp8,fp8,0,7.4278717041015625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,64,0,1,float16,fp8,0,8.238639831542969
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,64,128,1,fp8,fp8,0,1.2747093041737874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,64,0,1,float16,float16,0,8.457045237223307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,64,128,1,float16,float16,0,1.4256426493326824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,64,128,1,float16,fp8,0,1.4384106000264485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,64,128,1,fp8,fp8,0,1.3176960150400798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,64,0,1,fp8,fp8,0,7.444234848022461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,64,0,1,float16,fp8,0,8.319599787394205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,64,128,1,float16,float16,0,0.803925355275472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,64,0,1,float16,float16,0,8.451290766398111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,64,128,1,float16,fp8,0,0.8221973578135172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,64,128,1,fp8,fp8,0,0.7636906305948893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,64,0,1,float16,float16,0,4.251354535420735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,64,0,1,float16,fp8,0,8.094714482625326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,64,0,1,fp8,fp8,0,7.489706675211589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,64,128,1,float16,float16,0,0.7173386414845785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,64,128,1,float16,fp8,0,0.7223359743754069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,64,0,1,fp8,fp8,0,3.8983945846557617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,64,128,1,fp8,fp8,0,0.6551040013631185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,64,0,1,float16,fp8,0,4.54691727956136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,64,0,1,float16,float16,0,4.291098594665527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,64,128,1,float16,float16,0,0.7215092976888021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,64,128,1,float16,fp8,0,0.7279626528422037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,64,128,1,fp8,fp8,0,0.6626933415730795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,64,0,1,fp8,fp8,0,3.7885920206705728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,64,0,1,float16,fp8,0,4.2682186762491865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,64,128,1,float16,float16,0,0.7280159791310629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,64,0,1,float16,float16,0,4.231391906738281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,64,128,1,float16,fp8,0,0.737061341603597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,64,0,1,fp8,fp8,0,3.795935948689779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,64,128,1,fp8,fp8,0,0.6711359818776449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,64,0,1,float16,fp8,0,4.327269236246745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,64,128,1,float16,float16,0,0.7428212960561117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,64,0,1,float16,float16,0,4.1331892013549805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,64,128,1,float16,fp8,0,0.7525973320007324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,64,128,1,fp8,fp8,0,0.6889599959055582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,64,0,1,fp8,fp8,0,3.803173383076986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,64,0,1,float16,fp8,0,4.121232032775879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,64,128,1,float16,float16,0,0.4477866490681966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,64,0,1,float16,float16,0,4.292426745096843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,64,128,1,float16,fp8,0,0.45869867006937665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,64,0,1,float16,float16,0,2.193984031677246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,64,128,1,fp8,fp8,0,0.4288959900538127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,64,0,1,fp8,fp8,0,3.82260799407959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,64,0,1,float16,fp8,0,4.12877877553304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,64,128,1,float16,float16,0,0.406826655069987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,64,128,1,float16,fp8,0,0.4084959824879964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,64,0,1,fp8,fp8,0,2.042773405710856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,64,0,1,float16,fp8,0,2.205904006958008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,64,128,1,fp8,fp8,0,0.3754986524581909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,64,0,1,float16,float16,0,2.139392058054606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,64,128,1,float16,float16,0,0.4071520169576009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,64,128,1,float16,fp8,0,0.4105653365453084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,64,0,1,fp8,fp8,0,1.9908372561136882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,64,0,1,float16,fp8,0,2.2341599464416504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,64,128,1,fp8,fp8,0,0.3796000083287557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,64,0,1,float16,float16,0,2.1591199239095054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,64,128,1,float16,float16,0,0.412714680035909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,64,128,1,float16,fp8,0,0.41570667425791424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,64,0,1,fp8,fp8,0,1.9922240575154622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,64,0,1,float16,fp8,0,2.147461255391439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,64,128,1,fp8,fp8,0,0.38393068313598633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,64,0,1,float16,float16,0,2.1481760342915854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,64,128,1,float16,float16,0,0.41788268089294434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,64,128,1,float16,fp8,0,0.42315733432769775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,64,0,1,fp8,fp8,0,1.9977866808573406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,64,0,1,float16,fp8,0,2.17138671875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,64,128,1,fp8,fp8,0,0.3928106625874837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,64,128,1,float16,float16,0,0.31445332368214923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,64,0,1,float16,float16,0,2.1587840716044107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,64,128,1,float16,fp8,0,0.3174826701482137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,64,0,1,fp8,fp8,0,2.006768067677816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,64,0,1,float16,fp8,0,2.165162722269694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,64,128,1,fp8,fp8,0,0.29568533102671307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,64,0,1,float16,float16,0,1.2378133138020833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,64,128,1,float16,float16,0,0.3141439954439799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,64,128,1,float16,fp8,0,0.31410133838653564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,64,0,1,fp8,fp8,0,1.1492479642232258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,64,0,1,float16,fp8,0,1.2416640122731526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,64,0,1,float16,float16,0,1.2323040167490642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,64,128,1,fp8,fp8,0,0.29392000039418537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,64,128,1,float16,float16,0,0.31433600187301636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,64,0,1,fp8,fp8,0,1.1474186579386394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,64,0,1,float16,fp8,0,1.2350133260091145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,64,128,1,float16,fp8,0,0.3139359951019287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,64,0,1,float16,float16,0,1.2323946952819824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,64,128,1,fp8,fp8,0,0.29312000672022503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,64,128,1,float16,float16,0,0.31329067548116046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,64,0,1,float16,fp8,0,1.2346933682759602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,64,0,1,fp8,fp8,0,1.1466399828592937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,64,128,1,float16,fp8,0,0.31405866146087646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,64,0,1,float16,float16,0,1.2351893583933513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,64,128,1,fp8,fp8,0,0.29389333724975586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,64,128,1,float16,float16,0,0.3142186601956685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,64,0,1,float16,fp8,0,1.2341066996256511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,64,0,1,fp8,fp8,0,1.1470080216725667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,64,128,1,float16,fp8,0,0.31622399886449176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,64,128,1,fp8,fp8,0,0.2940160036087036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,64,0,1,float16,float16,0,1.2362666924794514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,64,0,1,float16,fp8,0,1.237338701883952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,64,0,1,fp8,fp8,0,1.1474293073018391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,64,128,1,float16,float16,0,1.0174773534138997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,64,128,1,float16,fp8,0,1.0252906481424968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,64,128,1,fp8,fp8,0,0.9228959878285726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,64,128,1,float16,float16,0,1.0248853365580242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,64,0,1,float16,float16,0,4.866282780965169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,64,0,1,fp8,fp8,0,4.381104151407878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,64,128,1,float16,fp8,0,1.0346293449401855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,64,0,1,float16,fp8,0,4.863322575887044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,64,128,1,fp8,fp8,0,0.9370613098144531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,64,128,1,float16,float16,0,1.034336010615031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,64,0,1,float16,float16,0,4.746416091918945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,64,128,1,float16,fp8,0,1.0450186729431152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,64,0,1,fp8,fp8,0,4.39577070871989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,64,0,1,float16,fp8,0,4.789967854817708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,64,128,1,fp8,fp8,0,0.9493546485900879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,64,128,1,float16,float16,0,1.059285322825114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,64,0,1,float16,float16,0,4.845114707946777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,64,128,1,float16,fp8,0,1.070757309595744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,64,0,1,fp8,fp8,0,4.406405448913574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,64,0,1,float16,fp8,0,4.894970575968425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,64,128,1,fp8,fp8,0,0.9822986920674642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,64,128,1,float16,float16,0,0.6073813438415527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,64,0,1,float16,float16,0,4.790442784627278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,64,128,1,float16,fp8,0,0.6210879882176717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,64,0,1,float16,float16,0,2.5203359921773276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,64,128,1,fp8,fp8,0,0.5764319896697998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,64,0,1,fp8,fp8,0,4.440906524658203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,64,0,1,float16,fp8,0,4.80735460917155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,64,128,1,float16,float16,0,0.5420426527659098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,64,128,1,float16,fp8,0,0.5469813346862793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,64,0,1,fp8,fp8,0,2.344053268432617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,64,0,1,float16,fp8,0,2.547167936960856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,64,128,1,fp8,fp8,0,0.49664533138275146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,64,0,1,float16,float16,0,2.438447952270508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,64,128,1,float16,float16,0,0.5439786513646444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,64,128,1,float16,fp8,0,0.5504800081253052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,64,0,1,float16,fp8,0,2.4556800524393716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,64,0,1,fp8,fp8,0,2.262880007425944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,64,128,1,fp8,fp8,0,0.5023839871088663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,64,0,1,float16,float16,0,2.443434715270996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,64,128,1,float16,float16,0,0.549946665763855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,64,128,1,float16,fp8,0,0.5568960110346476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,64,0,1,fp8,fp8,0,2.26692263285319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,64,0,1,float16,fp8,0,2.4919466972351074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,64,128,1,fp8,fp8,0,0.507418672243754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,64,0,1,float16,float16,0,2.4516746203104653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,64,128,1,float16,float16,0,0.5605066617329916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,64,128,1,float16,fp8,0,0.5685333410898844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,64,0,1,fp8,fp8,0,2.274197260538737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,64,0,1,float16,fp8,0,2.6318880716959634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,64,128,1,fp8,fp8,0,0.5216853221257528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,64,128,1,float16,float16,0,0.34004799524943036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,64,0,1,float16,float16,0,2.5344212849934897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,64,128,1,float16,fp8,0,0.3487893342971802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,64,128,1,fp8,fp8,0,0.32710399230321247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,64,0,1,float16,float16,0,1.335279941558838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,64,0,1,fp8,fp8,0,2.285909334818522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,64,0,1,float16,fp8,0,2.474789301554362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,64,128,1,float16,float16,0,0.30614932378133136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,64,128,1,float16,fp8,0,0.30986666679382324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,64,0,1,float16,fp8,0,1.3436959584554036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,64,0,1,fp8,fp8,0,1.246666669845581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,64,0,1,float16,float16,0,1.2946080366770427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,64,128,1,fp8,fp8,0,0.2874666651089986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,64,128,1,float16,float16,0,0.3099413315455119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,64,0,1,fp8,fp8,0,1.2047359943389893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,64,128,1,float16,fp8,0,0.3112799922625224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,64,0,1,float16,fp8,0,1.2950026988983154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,64,0,1,float16,float16,0,1.2956213156382244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,64,128,1,fp8,fp8,0,0.2897226611773173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,64,128,1,float16,float16,0,0.31410666306813556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,64,0,1,fp8,fp8,0,1.207898696263631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,64,0,1,float16,fp8,0,1.2993333339691162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,64,128,1,float16,fp8,0,0.3164373238881429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,64,128,1,fp8,fp8,0,0.2919360001881917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,64,0,1,float16,float16,0,1.3025600115458171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,64,128,1,float16,float16,0,0.31828800837198895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,64,0,1,float16,fp8,0,1.3067466417948406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,64,0,1,fp8,fp8,0,1.2100373109181721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,64,128,1,float16,fp8,0,0.322378675142924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,64,0,1,float16,float16,0,1.3086026509602864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,64,128,1,fp8,fp8,0,0.30002667506535846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,64,128,1,float16,float16,0,0.23884799083073935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,64,0,1,float16,fp8,0,1.3145439624786377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,64,128,1,float16,fp8,0,0.23960532744725546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,64,0,1,fp8,fp8,0,1.2177386283874512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,64,0,1,float16,float16,0,0.7794880072275797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,64,128,1,fp8,fp8,0,0.226090669631958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,64,128,1,float16,float16,0,0.2363040049870809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,64,0,1,fp8,fp8,0,0.7224319775899252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,64,0,1,float16,fp8,0,0.7790453433990479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,64,128,1,float16,fp8,0,0.2379466692606608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,64,128,1,fp8,fp8,0,0.2217386762301127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,64,0,1,float16,float16,0,0.7746666272481283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,64,128,1,float16,float16,0,0.23627734184265137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,64,0,1,fp8,fp8,0,0.7193439801534017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,64,0,1,float16,fp8,0,0.772981325785319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,64,128,1,float16,fp8,0,0.23655466238657633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,64,128,1,fp8,fp8,0,0.22410666942596436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,64,0,1,float16,float16,0,0.7747626304626465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,64,128,1,float16,float16,0,0.23751999934514365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,64,0,1,fp8,fp8,0,0.7201600074768066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,64,0,1,float16,fp8,0,0.7743519941965739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,64,128,1,float16,fp8,0,0.2383306622505188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,64,128,1,fp8,fp8,0,0.2222506602605184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,64,0,1,float16,float16,0,0.7755573590596517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,64,128,1,float16,float16,0,0.2387253244717916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,64,0,1,fp8,fp8,0,0.7186400095621744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,64,0,1,float16,fp8,0,0.774399995803833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,64,128,1,fp8,fp8,0,0.22414400180180868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,64,128,1,float16,fp8,0,0.23811733722686768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,64,0,1,float16,float16,0,0.7774879932403564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,64,0,1,fp8,fp8,0,0.719808022181193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,64,0,1,float16,fp8,0,0.7753386497497559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,64,128,1,float16,float16,0,0.8485120137532552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,64,128,1,float16,fp8,0,0.8540960152943929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,64,128,1,fp8,fp8,0,0.7704266707102457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,64,128,1,float16,float16,0,0.8538239796956381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,64,0,1,float16,float16,0,3.580080032348633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,64,0,1,fp8,fp8,0,3.1612745920817056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,64,128,1,float16,fp8,0,0.8628693421681722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,64,0,1,float16,fp8,0,3.6287307739257812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,64,128,1,fp8,fp8,0,0.779690663019816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,64,0,1,float16,float16,0,3.431349436442057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,64,128,1,float16,float16,0,0.8632799784342448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,64,128,1,float16,fp8,0,0.8726560274759928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,64,0,1,fp8,fp8,0,3.1716906229654946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,64,0,1,float16,fp8,0,3.534330685933431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,64,128,1,fp8,fp8,0,0.7916053136189779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,64,0,1,float16,float16,0,3.442490577697754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,64,128,1,float16,float16,0,0.8828426996866862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,64,128,1,float16,fp8,0,0.8923306465148926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,64,0,1,fp8,fp8,0,3.181935946146647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,64,0,1,float16,fp8,0,3.608213424682617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,64,128,1,fp8,fp8,0,0.8153386910756429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,64,128,1,float16,float16,0,0.5088106791178385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,64,0,1,float16,float16,0,3.4651947021484375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,64,128,1,float16,fp8,0,0.5198239882787069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,64,128,1,fp8,fp8,0,0.4838453531265259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,64,0,1,float16,float16,0,1.8369706471761067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,64,0,1,fp8,fp8,0,3.207066535949707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,64,0,1,float16,fp8,0,3.572335879007975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,64,128,1,float16,float16,0,0.4549706776936849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,64,0,1,float16,fp8,0,1.8515733083089192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,64,128,1,float16,fp8,0,0.4586079915364583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,64,0,1,fp8,fp8,0,1.7099307378133137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,64,0,1,float16,float16,0,1.7982187271118164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,64,128,1,fp8,fp8,0,0.41843199729919434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,64,128,1,float16,float16,0,0.45723732312520343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,64,0,1,float16,fp8,0,1.7758933703104656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,64,0,1,fp8,fp8,0,1.6434826850891113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,64,128,1,float16,fp8,0,0.46138668060302734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,64,0,1,float16,float16,0,1.7781972885131836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,64,128,1,fp8,fp8,0,0.42291732629140216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,64,128,1,float16,float16,0,0.4617600043614705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,64,0,1,fp8,fp8,0,1.646133263905843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,64,0,1,float16,fp8,0,1.7791147232055664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,64,128,1,float16,fp8,0,0.46776000658671063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,64,128,1,fp8,fp8,0,0.4270933469136556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,64,0,1,float16,float16,0,1.7823573748270671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,64,128,1,float16,float16,0,0.4698293209075928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,64,0,1,fp8,fp8,0,1.6533652941385906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,64,0,1,float16,fp8,0,1.78658660252889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,64,128,1,float16,fp8,0,0.476362665494283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,64,0,1,float16,float16,0,1.7976959546407063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,64,128,1,fp8,fp8,0,0.43721600373586017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,64,128,1,float16,float16,0,0.28576000531514484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,64,128,1,float16,fp8,0,0.29282132784525555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,64,0,1,float16,float16,0,0.9825599988301595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,64,0,1,fp8,fp8,0,1.6643199920654297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,64,0,1,float16,fp8,0,1.8005706469217937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,64,128,1,fp8,fp8,0,0.2751680016517639
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,64,128,1,float16,float16,0,0.25518399477005005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,64,128,1,float16,fp8,0,0.25684799750645954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,64,0,1,fp8,fp8,0,0.9199946721394857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,64,0,1,float16,fp8,0,0.9893066883087158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,64,0,1,float16,float16,0,0.9484373728434244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,64,128,1,fp8,fp8,0,0.24050666888554892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,64,128,1,float16,float16,0,0.25673067569732666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,64,0,1,fp8,fp8,0,0.8844266732533773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,64,0,1,float16,fp8,0,0.9506613413492838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,64,128,1,float16,fp8,0,0.25889599323272705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,64,0,1,float16,float16,0,0.9493760267893473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,64,128,1,fp8,fp8,0,0.24276266495386759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,64,128,1,float16,float16,0,0.2609013319015503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,64,0,1,float16,fp8,0,0.9517866770426432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,64,0,1,fp8,fp8,0,0.8875412940979004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,64,128,1,float16,fp8,0,0.2632319927215576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,64,0,1,float16,float16,0,0.9550560315450033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,64,128,1,fp8,fp8,0,0.24660799900690714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,64,128,1,float16,float16,0,0.2672053376833598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,64,0,1,float16,fp8,0,0.9579626719156901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,64,0,1,fp8,fp8,0,0.8891839981079102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,64,128,1,float16,fp8,0,0.27077333132425946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,64,128,1,fp8,fp8,0,0.2527359922726949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,64,0,1,float16,float16,0,0.9627573490142822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,64,128,1,float16,float16,0,0.2057653268178304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,64,0,1,float16,fp8,0,0.9663519859313965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,64,0,1,fp8,fp8,0,0.8962186972300211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,64,0,1,float16,float16,0,0.5858933528264364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,64,128,1,float16,fp8,0,0.20497065782546997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,64,128,1,fp8,fp8,0,0.19148266315460205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,64,128,1,float16,float16,0,0.2016213337580363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,64,0,1,float16,fp8,0,0.5864160060882568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,64,0,1,fp8,fp8,0,0.5436053276062012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,64,128,1,float16,fp8,0,0.2015519936879476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,64,128,1,fp8,fp8,0,0.1891146699587504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,64,0,1,float16,float16,0,0.5806080102920532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,64,128,1,float16,float16,0,0.19996267557144165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,64,0,1,fp8,fp8,0,0.5419040123621622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,64,0,1,float16,fp8,0,0.5819146633148193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,64,128,1,float16,fp8,0,0.20169599850972494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,64,128,1,fp8,fp8,0,0.1893333395322164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,64,0,1,float16,float16,0,0.5801973342895508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,64,128,1,float16,float16,0,0.19989866018295288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,64,0,1,fp8,fp8,0,0.5420746803283691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,64,0,1,float16,fp8,0,0.5819893280665079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,64,128,1,float16,fp8,0,0.20146133502324423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,64,128,1,fp8,fp8,0,0.1892533302307129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,64,0,1,float16,float16,0,0.5809760093688965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,64,128,1,float16,float16,0,0.20376533269882202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,64,0,1,fp8,fp8,0,0.5411306619644165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,64,0,1,float16,fp8,0,0.5831040143966675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,64,128,1,float16,fp8,0,0.20302400986353555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,64,128,1,fp8,fp8,0,0.18941332896550497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,64,0,1,float16,float16,0,0.5832320054372152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,64,0,1,float16,fp8,0,0.5851786533991495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,64,0,1,fp8,fp8,0,0.541541337966919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,64,128,1,float16,float16,0,1.3211733500162761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,64,128,1,float16,fp8,0,1.3322186470031738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,64,128,1,fp8,fp8,0,1.1976266702016194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,64,128,1,float16,float16,0,1.3406507174173992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,64,0,1,float16,float16,0,4.602693239847819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,64,0,1,fp8,fp8,0,4.192437489827474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,64,0,1,float16,fp8,0,4.749818801879883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,64,128,1,float16,fp8,0,1.353450616200765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,64,128,1,fp8,fp8,0,1.2245386441548665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,64,0,1,float16,float16,0,4.762597401936849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,64,128,1,float16,float16,0,1.3550507227579753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,64,128,1,float16,fp8,0,1.3644372622172039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,64,0,1,fp8,fp8,0,4.216325441996257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,64,0,1,float16,fp8,0,4.7958879470825195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,64,128,1,fp8,fp8,0,1.238645315170288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,64,0,1,float16,float16,0,4.713040033976237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,64,128,1,float16,float16,0,1.3927359580993652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,64,0,1,fp8,fp8,0,4.230160077412923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,64,128,1,float16,fp8,0,1.404421329498291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,64,0,1,float16,fp8,0,4.593471844991048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,64,128,1,fp8,fp8,0,1.2842506567637126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,64,128,1,float16,float16,0,0.7704319953918457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,64,0,1,float16,float16,0,4.833669344584147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,64,128,1,float16,fp8,0,0.786298672358195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,64,0,1,fp8,fp8,0,4.277519861857097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,64,128,1,fp8,fp8,0,0.7291786670684814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,64,0,1,float16,fp8,0,4.644325256347656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,64,0,1,float16,float16,0,2.4590986569722495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,64,128,1,float16,float16,0,0.6808640162150065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,64,128,1,float16,fp8,0,0.6862719853719076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,64,0,1,fp8,fp8,0,2.2505547205607095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,64,0,1,float16,fp8,0,2.4394240379333496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,64,128,1,fp8,fp8,0,0.6195306777954102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,64,0,1,float16,float16,0,2.4625813166300454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,64,128,1,float16,float16,0,0.6859412988026937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,64,0,1,fp8,fp8,0,2.140218734741211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,64,128,1,float16,fp8,0,0.692799965540568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,64,0,1,float16,fp8,0,2.3830666542053223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,64,0,1,float16,float16,0,2.4443413416544595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,64,128,1,fp8,fp8,0,0.6274986664454142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,64,128,1,float16,float16,0,0.7154613335927328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,64,0,1,fp8,fp8,0,2.147610664367676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,64,0,1,float16,fp8,0,2.3495893478393555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,64,128,1,float16,fp8,0,0.7002399762471517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,64,128,1,fp8,fp8,0,0.6356426477432251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,64,0,1,float16,float16,0,2.334810733795166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,64,128,1,float16,float16,0,0.7084533373514811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,64,0,1,float16,fp8,0,2.395077387491862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,64,0,1,fp8,fp8,0,2.154138724009196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,64,128,1,float16,fp8,0,0.7165653705596924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,64,0,1,float16,float16,0,2.355722745259603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,64,128,1,fp8,fp8,0,0.6539573272069296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,64,128,1,float16,float16,0,0.4113279978434245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,64,128,1,float16,fp8,0,0.42027731736501056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,64,0,1,float16,float16,0,1.2617013454437256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,64,0,1,fp8,fp8,0,2.1740852991739907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,64,0,1,float16,fp8,0,2.4773759841918945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,64,128,1,fp8,fp8,0,0.391973336537679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,64,128,1,float16,float16,0,0.36531734466552734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,64,128,1,float16,fp8,0,0.3694346745808919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,64,0,1,float16,fp8,0,1.270085334777832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,64,0,1,fp8,fp8,0,1.1743786334991455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,64,0,1,float16,float16,0,1.2103359699249268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,64,128,1,fp8,fp8,0,0.33876800537109375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,64,128,1,float16,float16,0,0.3693173329035441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,64,0,1,float16,fp8,0,1.2114293575286865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,64,0,1,fp8,fp8,0,1.1220800081888835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,64,128,1,float16,fp8,0,0.37353066603342694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,64,128,1,fp8,fp8,0,0.34138667583465576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,64,0,1,float16,float16,0,1.2122506300608318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,64,128,1,float16,float16,0,0.3731893301010132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,64,0,1,float16,fp8,0,1.2169546286265056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,64,0,1,fp8,fp8,0,1.1253386338551838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,64,128,1,float16,fp8,0,0.3762400150299072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,64,128,1,fp8,fp8,0,0.3455893198649089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,64,0,1,float16,float16,0,1.217077334721883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,64,128,1,float16,float16,0,0.3797760009765625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,64,0,1,fp8,fp8,0,1.1298027038574219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,64,0,1,float16,fp8,0,1.2213866710662842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,64,128,1,float16,fp8,0,0.3861813147862752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,64,128,1,fp8,fp8,0,0.3556266625722249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,64,0,1,float16,float16,0,1.227397362391154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,64,128,1,float16,float16,0,0.2341759999593099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,64,0,1,fp8,fp8,0,1.1388800144195557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,64,0,1,float16,fp8,0,1.234613339106242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,64,128,1,float16,fp8,0,0.24196267127990723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,64,128,1,fp8,fp8,0,0.2262773315111796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,64,0,1,float16,float16,0,0.6852906545003256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,64,128,1,float16,float16,0,0.2099626660346985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,64,0,1,float16,fp8,0,0.692021369934082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,64,0,1,fp8,fp8,0,0.6416693528493246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,64,128,1,float16,fp8,0,0.20962133010228476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,64,0,1,float16,float16,0,0.65447465578715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,64,128,1,fp8,fp8,0,0.1970400015513102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,64,128,1,float16,float16,0,0.20974934101104736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,64,0,1,float16,fp8,0,0.6569226582845052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,64,0,1,fp8,fp8,0,0.611413319905599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,64,0,1,float16,float16,0,0.6563200155893961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,64,128,1,float16,fp8,0,0.2110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,64,128,1,fp8,fp8,0,0.1975946625073751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,64,128,1,float16,float16,0,0.2138026754061381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,64,0,1,float16,fp8,0,0.6582613388697306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,64,0,1,fp8,fp8,0,0.6136586666107178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,64,128,1,float16,fp8,0,0.21593066056569418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,64,0,1,float16,float16,0,0.6614826520284017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,64,128,1,fp8,fp8,0,0.20269866784413657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,64,128,1,float16,float16,0,0.21916266282399496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,64,0,1,float16,fp8,0,0.6621333360671997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,64,0,1,fp8,fp8,0,0.6172746817270914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,64,0,1,float16,float16,0,0.6665546496709188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,64,128,1,float16,fp8,0,0.2221333384513855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,64,128,1,fp8,fp8,0,0.20769067605336508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,64,128,1,float16,float16,0,0.16875733931859335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,64,0,1,float16,fp8,0,0.6706293423970541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,64,0,1,float16,float16,0,0.4222293297449748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,64,128,1,float16,fp8,0,0.1673439939816793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,64,0,1,fp8,fp8,0,0.621610681215922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,64,128,1,fp8,fp8,0,0.15773333112398782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,64,128,1,float16,float16,0,0.1653333306312561
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,64,0,1,float16,fp8,0,0.4212213357289632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,64,0,1,fp8,fp8,0,0.3882613182067871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,64,0,1,float16,float16,0,0.41551466782887775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,64,128,1,float16,fp8,0,0.16566933194796243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,64,128,1,fp8,fp8,0,0.1546986699104309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,64,128,1,float16,float16,0,0.16614933808644614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,64,0,1,float16,fp8,0,0.4170080025990804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,64,0,1,fp8,fp8,0,0.38570133845011395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,64,0,1,float16,float16,0,0.4183093309402466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,64,128,1,float16,fp8,0,0.16659733653068542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,64,128,1,fp8,fp8,0,0.15421866377194723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,64,0,1,float16,fp8,0,0.41659732659657794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,64,128,1,float16,float16,0,0.16492266456286112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,64,0,1,fp8,fp8,0,0.38782934347788495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,64,0,1,float16,float16,0,0.41732267538706463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,64,128,1,fp8,fp8,0,0.15430933237075806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,64,128,1,float16,fp8,0,0.1648426651954651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,64,128,1,float16,float16,0,0.1646986703077952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,64,0,1,float16,fp8,0,0.418448011080424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,64,0,1,fp8,fp8,0,0.3869066635767619
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,64,0,1,float16,float16,0,0.4169173240661621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,64,128,1,float16,fp8,0,0.16569599509239197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,64,128,1,fp8,fp8,0,0.1562933325767517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,64,0,1,float16,fp8,0,0.4185440142949422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,64,0,1,fp8,fp8,0,0.38601601123809814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,64,128,1,float16,float16,0,0.9892053604125977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,64,128,1,float16,fp8,0,0.9987093607584635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,64,128,1,fp8,fp8,0,0.8947093486785889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,64,128,1,float16,float16,0,0.9987680117289225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,64,0,1,float16,float16,0,2.771786689758301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,64,0,1,float16,fp8,0,2.777872085571289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,64,128,1,float16,fp8,0,1.0064266522725422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,64,0,1,fp8,fp8,0,2.544816017150879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,64,128,1,fp8,fp8,0,0.910261313120524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,64,0,1,float16,float16,0,2.8310934702555337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,64,128,1,float16,float16,0,1.00764266649882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,64,0,1,fp8,fp8,0,2.559744040171305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,64,0,1,float16,fp8,0,2.7893813451131186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,64,128,1,float16,fp8,0,1.0170506636301677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,64,128,1,fp8,fp8,0,0.9215146700541178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,64,0,1,float16,float16,0,2.7937065760294595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,64,128,1,float16,float16,0,1.0333066781361897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,64,0,1,float16,fp8,0,2.8031787872314453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,64,0,1,fp8,fp8,0,2.5725706418355307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,64,128,1,float16,fp8,0,1.0455040136973064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,64,128,1,fp8,fp8,0,0.9552106857299805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,64,0,1,float16,float16,0,2.822533289591471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,64,128,1,float16,float16,0,0.5823466777801514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,64,0,1,float16,fp8,0,2.836218516031901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,64,0,1,fp8,fp8,0,2.6055520375569663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,64,0,1,float16,float16,0,1.5009546279907227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,64,128,1,float16,fp8,0,0.5961066484451294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,64,128,1,fp8,fp8,0,0.5500959952672323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,64,128,1,float16,float16,0,0.5144000053405762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,64,0,1,float16,fp8,0,1.5139466921488445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,64,0,1,fp8,fp8,0,1.3951412836710613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,64,128,1,float16,fp8,0,0.518671989440918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,64,128,1,fp8,fp8,0,0.4700266520182292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,64,0,1,float16,float16,0,1.423103968302409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,64,128,1,float16,float16,0,0.519760012626648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,64,0,1,fp8,fp8,0,1.3117813269297283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,64,0,1,float16,fp8,0,1.4285492897033691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,64,128,1,float16,fp8,0,0.5250080029169718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,64,128,1,fp8,fp8,0,0.4755466779073079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,64,0,1,float16,float16,0,1.4291307131449382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,64,128,1,float16,float16,0,0.5247999827067057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,64,0,1,float16,fp8,0,1.434000015258789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,64,0,1,fp8,fp8,0,1.3191680113474529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,64,128,1,float16,fp8,0,0.5306346813837687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,64,128,1,fp8,fp8,0,0.4819999933242798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,64,0,1,float16,float16,0,1.4362932840983074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,64,128,1,float16,float16,0,0.5366346836090088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,64,0,1,float16,fp8,0,1.442346731821696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,64,0,1,fp8,fp8,0,1.324191967646281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,64,128,1,float16,fp8,0,0.5423146486282349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,64,0,1,float16,float16,0,1.4500853220621746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,64,128,1,fp8,fp8,0,0.4944693247477214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,64,128,1,float16,float16,0,0.3145973285039266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,64,128,1,float16,fp8,0,0.3216266632080078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,64,0,1,float16,float16,0,0.7912320295969645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,64,0,1,float16,fp8,0,1.4575360616048176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,64,0,1,fp8,fp8,0,1.3384265899658203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,64,128,1,fp8,fp8,0,0.29833600918451947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,64,128,1,float16,float16,0,0.2774133284886678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,64,0,1,float16,fp8,0,0.7991733551025391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,64,0,1,fp8,fp8,0,0.7365067005157471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,64,128,1,float16,fp8,0,0.2794719934463501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,64,0,1,float16,float16,0,0.7491146723429362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,64,128,1,fp8,fp8,0,0.25889599323272705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,64,128,1,float16,float16,0,0.2803200085957845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,64,0,1,float16,fp8,0,0.751967986424764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,64,0,1,fp8,fp8,0,0.6979040304819742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,64,0,1,float16,float16,0,0.754250685373942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,64,128,1,float16,fp8,0,0.2833706736564636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,64,128,1,fp8,fp8,0,0.26146133740743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,64,128,1,float16,float16,0,0.28384000062942505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,64,0,1,float16,fp8,0,0.7568906943003336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,64,0,1,fp8,fp8,0,0.6993227005004883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,64,0,1,float16,float16,0,0.758512020111084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,64,128,1,float16,fp8,0,0.2876266638437907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,64,128,1,fp8,fp8,0,0.2655679980913798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,64,128,1,float16,float16,0,0.28996266921361286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,64,0,1,float16,fp8,0,0.7602346738179525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,64,0,1,fp8,fp8,0,0.7035893599192301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,64,128,1,float16,fp8,0,0.29452266295750934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,64,0,1,float16,float16,0,0.7671466668446859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,64,128,1,fp8,fp8,0,0.2718026638031006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,64,128,1,float16,float16,0,0.17909866571426392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,64,0,1,float16,fp8,0,0.7691840330759684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,64,128,1,float16,fp8,0,0.18329066038131714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,64,0,1,fp8,fp8,0,0.710197369257609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,64,0,1,float16,float16,0,0.44042134284973145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,64,128,1,fp8,fp8,0,0.17265599966049194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,64,128,1,float16,float16,0,0.1585706671079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,64,0,1,fp8,fp8,0,0.41205334663391113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,64,0,1,float16,fp8,0,0.44509867827097577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,64,128,1,float16,fp8,0,0.15847466389338175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,64,0,1,float16,float16,0,0.41495466232299805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,64,128,1,fp8,fp8,0,0.14621866742769876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,64,0,1,float16,fp8,0,0.41678400834401447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,64,128,1,float16,float16,0,0.1606933375199636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,64,0,1,fp8,fp8,0,0.38395198186238605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,64,0,1,float16,float16,0,0.41652266184488934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,64,128,1,float16,fp8,0,0.15901866555213928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,64,128,1,fp8,fp8,0,0.1495733360449473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,64,0,1,float16,fp8,0,0.4185813268025716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,64,128,1,float16,float16,0,0.16031466921170553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,64,0,1,fp8,fp8,0,0.3868746757507324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,64,0,1,float16,float16,0,0.41948266824086505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,64,128,1,float16,fp8,0,0.16057599584261575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,64,128,1,fp8,fp8,0,0.1527359982331594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,64,128,1,float16,float16,0,0.16405866543451944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,64,0,1,float16,fp8,0,0.4187893470128377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,64,0,1,fp8,fp8,0,0.39055466651916504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,64,0,1,float16,float16,0,0.423744002978007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,64,128,1,float16,fp8,0,0.16658133268356323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,64,128,1,fp8,fp8,0,0.1586133340994517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,64,128,1,float16,float16,0,0.12573867042859396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,64,0,1,float16,fp8,0,0.42425068219502765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,64,0,1,fp8,fp8,0,0.3967306613922119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,64,0,1,float16,float16,0,0.2769920031229655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,64,128,1,float16,fp8,0,0.1257866621017456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,64,128,1,fp8,fp8,0,0.11949867010116577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,64,0,1,float16,fp8,0,0.2788640062014262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,64,128,1,float16,float16,0,0.12377066413561504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,64,0,1,fp8,fp8,0,0.25887467463811237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,64,128,1,float16,fp8,0,0.12388267119725545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,64,0,1,float16,float16,0,0.27769599358240765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,64,128,1,fp8,fp8,0,0.11758933464686076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,64,128,1,float16,float16,0,0.12357333302497864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,64,0,1,float16,fp8,0,0.27643734216690063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,64,0,1,fp8,fp8,0,0.25749866167704266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,64,128,1,float16,fp8,0,0.12560000022252402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,64,128,1,fp8,fp8,0,0.11773332953453064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,64,0,1,float16,float16,0,0.27737067143122357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,64,128,1,float16,float16,0,0.1244586706161499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,64,0,1,float16,fp8,0,0.27582399050394696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,64,0,1,fp8,fp8,0,0.2571199933687846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,64,128,1,float16,fp8,0,0.12365333239237468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,64,128,1,fp8,fp8,0,0.11804266770680745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,64,0,1,float16,float16,0,0.27566399176915485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,64,128,1,float16,float16,0,0.12391466895739238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,64,0,1,float16,fp8,0,0.2773653268814087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,64,0,1,fp8,fp8,0,0.25788267453511554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,64,128,1,float16,fp8,0,0.12552000085512796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,64,128,1,fp8,fp8,0,0.11750400066375732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,64,0,1,float16,float16,0,0.27720000346501666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,64,0,1,float16,fp8,0,0.2771786650021871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,64,0,1,fp8,fp8,0,0.25697600841522217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,64,128,1,float16,float16,0,1.3058826923370361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,64,128,1,float16,fp8,0,1.3166240056355794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,64,128,1,fp8,fp8,0,1.1811947027842205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,64,0,1,float16,float16,0,2.8015947341918945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,64,0,1,float16,fp8,0,2.8103199005126953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,64,128,1,float16,float16,0,1.3266719977060955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,64,0,1,fp8,fp8,0,2.564026673634847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,64,128,1,fp8,fp8,0,1.205733299255371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,64,128,1,float16,fp8,0,1.336527983347575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,64,0,1,float16,float16,0,2.8237813313802085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,64,128,1,float16,float16,0,1.339461326599121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,64,0,1,float16,fp8,0,2.835397402445475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,64,0,1,fp8,fp8,0,2.589301268259684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,64,0,1,float16,float16,0,2.8524001439412436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,64,128,1,float16,fp8,0,1.347599983215332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,64,128,1,fp8,fp8,0,1.2197439670562744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,64,128,1,float16,float16,0,1.3802879651387532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,64,0,1,float16,fp8,0,2.8475945790608725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,64,0,1,fp8,fp8,0,2.602735996246338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,64,128,1,float16,fp8,0,1.3886292775472004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,64,0,1,float16,float16,0,2.8893067042032876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,64,128,1,fp8,fp8,0,1.26583464940389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,64,128,1,float16,float16,0,0.7562346458435059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,64,128,1,float16,fp8,0,0.7696320215861002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,64,0,1,float16,float16,0,1.529952049255371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,64,0,1,float16,fp8,0,2.8907785415649414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,64,0,1,fp8,fp8,0,2.649237314860026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,64,128,1,fp8,fp8,0,0.7111466725667318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,64,128,1,float16,float16,0,0.6635306676228842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,64,0,1,fp8,fp8,0,1.4159092903137207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,64,0,1,float16,fp8,0,1.5431572596232097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,64,128,1,float16,fp8,0,0.6681439876556396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,64,0,1,float16,float16,0,1.4221067428588867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,64,128,1,fp8,fp8,0,0.603007992108663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,64,128,1,float16,float16,0,0.6688053607940674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,64,0,1,float16,fp8,0,1.430415948232015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,64,0,1,fp8,fp8,0,1.3037013212839763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,64,128,1,float16,fp8,0,0.67685866355896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,64,0,1,float16,float16,0,1.4292213122049968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,64,128,1,fp8,fp8,0,0.6098293463389078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,64,128,1,float16,float16,0,0.6753706932067871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,64,0,1,float16,fp8,0,1.4355146090189617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,64,0,1,fp8,fp8,0,1.3136533101399739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,64,0,1,float16,float16,0,1.4377652804056804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,64,128,1,float16,fp8,0,0.6815893650054932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,64,128,1,fp8,fp8,0,0.6174986759821574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,64,128,1,float16,float16,0,0.6910826365152994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,64,0,1,float16,fp8,0,1.4436906178792317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,64,0,1,fp8,fp8,0,1.3194506963094075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,64,0,1,float16,float16,0,1.4545386632283528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,64,128,1,float16,fp8,0,0.6993707021077474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,64,128,1,fp8,fp8,0,0.6359093189239502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,64,128,1,float16,float16,0,0.395957350730896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,64,0,1,float16,float16,0,0.7915626366933187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,64,0,1,float16,fp8,0,1.4628532727559407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,64,128,1,float16,fp8,0,0.4044640064239502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,64,0,1,fp8,fp8,0,1.3380853335062664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,64,128,1,fp8,fp8,0,0.37273601690928143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,64,128,1,float16,float16,0,0.3487199942270915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,64,0,1,fp8,fp8,0,0.7373973528544108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,64,0,1,float16,fp8,0,0.8005812962849935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,64,128,1,float16,fp8,0,0.351034681002299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,64,0,1,float16,float16,0,0.7402826944986979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,64,128,1,fp8,fp8,0,0.32125333944956463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,64,0,1,float16,fp8,0,0.7415200074513754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,64,128,1,float16,float16,0,0.3511039813359578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,64,0,1,fp8,fp8,0,0.6835839748382568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,64,0,1,float16,float16,0,0.7431306838989258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,64,128,1,float16,fp8,0,0.3551040093104045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,64,128,1,fp8,fp8,0,0.32476266225179035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,64,128,1,float16,float16,0,0.3563786745071411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,64,0,1,float16,fp8,0,0.7470506827036539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,64,0,1,fp8,fp8,0,0.6857173442840576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,64,0,1,float16,float16,0,0.7483519713083903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,64,128,1,float16,fp8,0,0.3596746524175008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,64,128,1,fp8,fp8,0,0.32846933603286743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,64,128,1,float16,float16,0,0.36444799105326336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,64,0,1,float16,fp8,0,0.7507946491241455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,64,0,1,fp8,fp8,0,0.6914133230845133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,64,128,1,float16,fp8,0,0.36762134234110516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,64,128,1,fp8,fp8,0,0.3370986779530843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,64,0,1,float16,float16,0,0.7573386828104655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,64,128,1,float16,float16,0,0.21585599581400552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,64,0,1,float16,fp8,0,0.7612480322519938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,64,0,1,fp8,fp8,0,0.69923202196757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,64,128,1,float16,fp8,0,0.22055466969807944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,64,0,1,float16,float16,0,0.42721601327260333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,64,128,1,fp8,fp8,0,0.20788800716400146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,64,128,1,float16,float16,0,0.18889600038528442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,64,0,1,fp8,fp8,0,0.39857598145802814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,64,0,1,float16,fp8,0,0.43189334869384766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,64,128,1,fp8,fp8,0,0.1771679917971293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,64,128,1,float16,fp8,0,0.18988800048828125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,64,0,1,float16,float16,0,0.39403200149536133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,64,128,1,float16,float16,0,0.18944533665974936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,64,0,1,fp8,fp8,0,0.3703306516011556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,64,0,1,float16,fp8,0,0.3970453341801961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,64,128,1,float16,fp8,0,0.19236799081166586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,64,128,1,fp8,fp8,0,0.17890133460362753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,64,0,1,float16,float16,0,0.39581867059071857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,64,128,1,float16,float16,0,0.19162132342656454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,64,0,1,fp8,fp8,0,0.37170132001241046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,64,0,1,float16,fp8,0,0.39964266618092853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,64,128,1,float16,fp8,0,0.19539733727773032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,64,128,1,fp8,fp8,0,0.18210667371749878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,64,0,1,float16,float16,0,0.3996586799621582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,64,128,1,float16,float16,0,0.19954133033752441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,64,0,1,float16,fp8,0,0.4039200146993001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,64,0,1,fp8,fp8,0,0.37373868624369305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,64,128,1,float16,fp8,0,0.2019253373146057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,64,128,1,fp8,fp8,0,0.1873226761817932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,64,0,1,float16,float16,0,0.40695468584696454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,64,128,1,float16,float16,0,0.1258133351802826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,64,0,1,float16,fp8,0,0.41228266557057697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,64,0,1,fp8,fp8,0,0.37990399201711017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,64,0,1,float16,float16,0,0.24378132820129395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,64,128,1,float16,fp8,0,0.12946133812268576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,64,128,1,fp8,fp8,0,0.12247999509175618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,64,0,1,float16,fp8,0,0.24714134136835733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,64,128,1,float16,float16,0,0.11361066500345866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,64,0,1,fp8,fp8,0,0.23142399390538534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,64,0,1,float16,float16,0,0.22943466901779175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,64,128,1,float16,fp8,0,0.11375466982523601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,64,128,1,fp8,fp8,0,0.10364799698193868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,64,0,1,float16,fp8,0,0.22892266511917114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,64,0,1,fp8,fp8,0,0.21072532733281454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,64,128,1,float16,float16,0,0.11319999893506368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,64,0,1,float16,float16,0,0.2296853264172872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,64,128,1,fp8,fp8,0,0.10526399811108907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,64,128,1,float16,fp8,0,0.11307199796040852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,64,0,1,float16,fp8,0,0.23059199253718057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,64,0,1,fp8,fp8,0,0.21016534169514975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,64,128,1,float16,float16,0,0.11342933773994446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,64,128,1,float16,fp8,0,0.11441066861152649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,64,0,1,float16,float16,0,0.23094399770100912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,64,128,1,fp8,fp8,0,0.10522133111953735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,64,0,1,float16,fp8,0,0.2336533268292745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,64,0,1,fp8,fp8,0,0.21386132637659708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,64,128,1,float16,float16,0,0.11377066373825073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,64,0,1,float16,float16,0,0.23188799619674683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,64,128,1,float16,fp8,0,0.11754666765530904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,64,128,1,fp8,fp8,0,0.10964799920717876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,64,0,1,float16,fp8,0,0.23397332429885864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,64,128,1,float16,float16,0,0.08933867017428081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,64,0,1,fp8,fp8,0,0.21825599670410156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,64,0,1,float16,float16,0,0.16275733709335327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,64,128,1,float16,fp8,0,0.0906880001227061
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,64,128,1,fp8,fp8,0,0.08479467034339905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,64,0,1,float16,fp8,0,0.16180266936620077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,64,0,1,fp8,fp8,0,0.15040533741315207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,64,128,1,float16,float16,0,0.08894933263460796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,64,0,1,float16,float16,0,0.16300266981124878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,64,128,1,float16,fp8,0,0.0890933374563853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,64,128,1,fp8,fp8,0,0.08468266328175862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,64,0,1,float16,fp8,0,0.16125333309173584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,64,0,1,fp8,fp8,0,0.15050133069356283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,64,128,1,float16,float16,0,0.08906666437784831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,64,128,1,float16,fp8,0,0.09082133571306865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,64,0,1,float16,float16,0,0.16342932979265848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,64,128,1,fp8,fp8,0,0.08462400237719218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,64,0,1,float16,fp8,0,0.16178133090337118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,64,128,1,float16,float16,0,0.08902933200200398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,64,0,1,fp8,fp8,0,0.15060266852378845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,64,0,1,float16,float16,0,0.16194666425387064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,64,128,1,float16,fp8,0,0.09103467067082723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,64,128,1,fp8,fp8,0,0.08498133222262065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,64,0,1,float16,fp8,0,0.16289066274960837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,64,0,1,fp8,fp8,0,0.15004266301790872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,64,128,1,float16,float16,0,0.09087466200192769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,64,0,1,float16,float16,0,0.16290133198102316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,64,128,1,float16,fp8,0,0.09098133444786072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,64,128,1,fp8,fp8,0,0.08453866839408875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,64,0,1,float16,fp8,0,0.16335466504096985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,64,0,1,fp8,fp8,0,0.15053866306940714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,64,128,1,float16,float16,0,0.9780960083007812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,64,128,1,float16,fp8,0,0.9871253172556559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,64,0,1,float16,float16,0,1.7760532697041829
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,64,128,1,fp8,fp8,0,0.8829546769460043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,64,0,1,float16,fp8,0,1.7861439387003581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,64,128,1,float16,float16,0,0.9889973004659017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,64,0,1,fp8,fp8,0,1.6209707260131836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,64,128,1,float16,fp8,0,0.995141347249349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,64,128,1,fp8,fp8,0,0.8976426919301351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,64,0,1,float16,float16,0,1.788981278737386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,64,128,1,float16,float16,0,0.9986026287078857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,64,0,1,float16,fp8,0,1.7951199213663738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,64,0,1,fp8,fp8,0,1.634943962097168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,64,128,1,float16,fp8,0,1.0054720242818196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,64,128,1,fp8,fp8,0,0.908186674118042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,64,0,1,float16,float16,0,1.8012852668762207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,64,0,1,float16,fp8,0,1.8074080149332683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,64,128,1,float16,float16,0,1.0233973662058513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,64,0,1,fp8,fp8,0,1.647450606028239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,64,128,1,fp8,fp8,0,0.9405439694722494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,64,128,1,float16,fp8,0,1.032970666885376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,64,0,1,float16,float16,0,1.8277440071105957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,64,128,1,float16,float16,0,0.570682684580485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,64,0,1,float16,fp8,0,1.836282730102539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,64,0,1,float16,float16,0,0.9856586456298828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,64,128,1,float16,fp8,0,0.5814880132675171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,64,0,1,fp8,fp8,0,1.6778666178385417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,64,128,1,fp8,fp8,0,0.5356320142745972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,64,0,1,float16,fp8,0,0.9968427022298177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,64,128,1,float16,float16,0,0.5007786750793457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,64,0,1,fp8,fp8,0,0.9145653247833252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,64,0,1,float16,float16,0,0.9089333216349283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,64,128,1,fp8,fp8,0,0.45741868019104004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,64,128,1,float16,fp8,0,0.5054879983266195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,64,0,1,float16,fp8,0,0.9143893718719482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,64,128,1,float16,float16,0,0.5080320040384928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,64,0,1,fp8,fp8,0,0.8342026869455973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,64,0,1,float16,float16,0,0.9165493647257487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,64,128,1,float16,fp8,0,0.5101279815038046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,64,128,1,fp8,fp8,0,0.46302398045857746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,64,0,1,float16,fp8,0,0.9196533362070719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,64,128,1,float16,float16,0,0.5113386710484823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,64,0,1,fp8,fp8,0,0.840399980545044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,64,0,1,float16,float16,0,0.9199679692586263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,64,128,1,float16,fp8,0,0.5170240004857382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,64,128,1,fp8,fp8,0,0.4681280056635539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,64,0,1,float16,fp8,0,0.926042636235555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,64,128,1,float16,float16,0,0.5219626824061075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,64,0,1,fp8,fp8,0,0.8442773024241129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,64,0,1,float16,float16,0,0.9337653319040934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,64,128,1,float16,fp8,0,0.5288426478703817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,64,128,1,fp8,fp8,0,0.4798666636149089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,64,128,1,float16,float16,0,0.3021013339360555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,64,0,1,float16,fp8,0,0.9393440087636312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,64,0,1,float16,float16,0,0.5156160195668539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,64,0,1,fp8,fp8,0,0.8573813438415527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,64,128,1,float16,fp8,0,0.3077813386917114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,64,128,1,fp8,fp8,0,0.28546667098999023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,64,0,1,float16,fp8,0,0.5235786835352579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,64,0,1,fp8,fp8,0,0.4819893439610799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,64,128,1,float16,float16,0,0.2641706665356954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,64,0,1,float16,float16,0,0.4758400122324626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,64,128,1,fp8,fp8,0,0.24494399627049765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,64,128,1,float16,fp8,0,0.2655679980913798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,64,0,1,float16,fp8,0,0.4798293511072795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,64,128,1,float16,float16,0,0.2654719948768616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,64,0,1,fp8,fp8,0,0.4429973363876343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,64,0,1,float16,float16,0,0.47864000002543133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,64,128,1,float16,fp8,0,0.2672746578852336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,64,128,1,fp8,fp8,0,0.2479626735051473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,64,128,1,float16,float16,0,0.2702346642812093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,64,0,1,float16,fp8,0,0.4809386730194092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,64,0,1,fp8,fp8,0,0.4447253147761027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,64,0,1,float16,float16,0,0.4838133255640666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,64,128,1,float16,fp8,0,0.2740853428840637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,64,128,1,fp8,fp8,0,0.252400000890096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,64,0,1,float16,fp8,0,0.48545066515604657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,64,128,1,float16,float16,0,0.2776906689008077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,64,0,1,fp8,fp8,0,0.44750932852427167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,64,0,1,float16,float16,0,0.49109331766764325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,64,128,1,float16,fp8,0,0.2801706592241923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,64,128,1,fp8,fp8,0,0.2579893271128337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,64,128,1,float16,float16,0,0.16478400429089865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,64,0,1,float16,fp8,0,0.49377600351969403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,64,0,1,fp8,fp8,0,0.4546560049057007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,64,0,1,float16,float16,0,0.28285332520802814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,64,128,1,float16,fp8,0,0.16741333405176798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,64,128,1,fp8,fp8,0,0.15865066647529602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,64,128,1,float16,float16,0,0.14076800147692362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,64,0,1,fp8,fp8,0,0.2653119961420695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,64,0,1,float16,fp8,0,0.2869653304417928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,64,0,1,float16,float16,0,0.2590026656786601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,64,128,1,float16,fp8,0,0.1425920029481252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,64,128,1,fp8,fp8,0,0.13209600249926248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,64,0,1,float16,fp8,0,0.25982399781545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,64,0,1,fp8,fp8,0,0.2397600015004476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,64,128,1,float16,float16,0,0.14236266414324442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,64,0,1,float16,float16,0,0.26024534304936725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,64,128,1,float16,fp8,0,0.14435199896494547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,64,128,1,fp8,fp8,0,0.13410133123397827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,64,0,1,float16,fp8,0,0.2592746615409851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,64,0,1,fp8,fp8,0,0.240831991036733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,64,128,1,float16,float16,0,0.14426666498184204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,64,0,1,float16,float16,0,0.2609440088272095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,64,128,1,float16,fp8,0,0.14612266421318054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,64,128,1,fp8,fp8,0,0.14004266262054443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,64,128,1,float16,float16,0,0.14890133341153464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,64,0,1,float16,fp8,0,0.2630880077679952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,64,0,1,fp8,fp8,0,0.24528533220291138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,64,128,1,float16,fp8,0,0.152319997549057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,64,128,1,fp8,fp8,0,0.14451199769973755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,64,0,1,float16,float16,0,0.26614399751027423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,64,128,1,float16,float16,0,0.09321066737174988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,64,0,1,float16,fp8,0,0.2691520055135091
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,64,0,1,fp8,fp8,0,0.25220266977945965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,64,0,1,float16,float16,0,0.16365866859753928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,64,128,1,float16,fp8,0,0.09512000282605489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,64,128,1,fp8,fp8,0,0.09490666786829631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,64,0,1,float16,fp8,0,0.16713599363962808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,64,128,1,float16,float16,0,0.08630933364232381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,64,0,1,fp8,fp8,0,0.15660799543062845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,64,128,1,float16,fp8,0,0.08679466446240743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,64,128,1,fp8,fp8,0,0.07875200112660725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,64,0,1,float16,float16,0,0.15563199917475382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,64,0,1,float16,fp8,0,0.15451199809710184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,64,128,1,float16,float16,0,0.08504000306129456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,64,0,1,fp8,fp8,0,0.14220800002415976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,64,128,1,float16,fp8,0,0.08496532837549846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,64,128,1,fp8,fp8,0,0.08075200021266937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,64,0,1,float16,float16,0,0.15465066830317178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,64,128,1,float16,float16,0,0.084906667470932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,64,0,1,fp8,fp8,0,0.14446933070818582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,64,0,1,float16,fp8,0,0.15641599893569946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,64,128,1,fp8,fp8,0,0.0809440016746521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,64,128,1,float16,fp8,0,0.0869760016600291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,64,0,1,float16,float16,0,0.154858668645223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,64,128,1,float16,float16,0,0.08694932858149211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,64,0,1,float16,fp8,0,0.15879467129707336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,64,0,1,fp8,fp8,0,0.14409066239992777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,64,128,1,float16,fp8,0,0.08877866466840108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,64,128,1,fp8,fp8,0,0.0823413332303365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,64,0,1,float16,float16,0,0.15682133038838705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,64,128,1,float16,float16,0,0.06868800024191539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,64,0,1,float16,fp8,0,0.15780267119407654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,64,0,1,fp8,fp8,0,0.14553067088127136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,64,0,1,float16,float16,0,0.11568533380826314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,64,128,1,float16,fp8,0,0.07027199864387512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,64,128,1,fp8,fp8,0,0.06612266600131989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,64,128,1,float16,float16,0,0.06846933563550313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,64,0,1,fp8,fp8,0,0.10923733313878377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,64,0,1,float16,fp8,0,0.11588266491889954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,64,0,1,float16,float16,0,0.11563199758529663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,64,128,1,float16,fp8,0,0.06859200199445088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,64,128,1,fp8,fp8,0,0.06654400130112965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,64,128,1,float16,float16,0,0.06962133447329204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,64,0,1,float16,fp8,0,0.11635200182596843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,64,0,1,fp8,fp8,0,0.10734400153160095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,64,128,1,fp8,fp8,0,0.06604800124963124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,64,128,1,float16,fp8,0,0.0701386680205663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,64,0,1,float16,float16,0,0.11586133639017741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,64,0,1,float16,fp8,0,0.11539199948310852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,64,128,1,float16,float16,0,0.06843733290831248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,64,0,1,fp8,fp8,0,0.10820266604423523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,64,128,1,float16,fp8,0,0.06843199829260509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,64,128,1,fp8,fp8,0,0.066170667608579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,64,0,1,float16,float16,0,0.1163093348344167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,64,128,1,float16,float16,0,0.07002666592597961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,64,0,1,float16,fp8,0,0.1165066659450531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,64,0,1,fp8,fp8,0,0.10919466614723206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,64,128,1,float16,fp8,0,0.06818133095900218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,64,128,1,fp8,fp8,0,0.06644266843795776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,64,0,1,float16,float16,0,0.11593066652615865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,64,0,1,float16,fp8,0,0.11723732948303223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,64,0,1,fp8,fp8,0,0.10742933551470439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,64,128,1,float16,float16,0,1.324671983718872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,64,128,1,fp8,fp8,0,1.1823946634928386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,64,128,1,float16,fp8,0,1.3277546564737956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,64,0,1,float16,float16,0,1.9531359672546387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,64,128,1,float16,float16,0,1.3551840782165527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,64,0,1,fp8,fp8,0,1.758570671081543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,64,0,1,float16,fp8,0,1.9522825876871746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,64,0,1,float16,float16,0,1.9790666898091633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,64,128,1,fp8,fp8,0,1.1994880040486653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,64,128,1,float16,fp8,0,1.354042689005534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,64,0,1,float16,fp8,0,1.9774293899536133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,64,0,1,fp8,fp8,0,1.7753012975056965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,64,128,1,float16,float16,0,1.372437318166097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,64,0,1,float16,float16,0,2.005674680074056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,64,128,1,float16,fp8,0,1.368010679880778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,64,128,1,fp8,fp8,0,1.2103839715321858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,64,0,1,float16,fp8,0,1.9946932792663574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,64,128,1,float16,float16,0,1.4019254048665364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,64,0,1,fp8,fp8,0,1.7874186833699544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,64,0,1,float16,float16,0,2.033573309580485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,64,128,1,fp8,fp8,0,1.2560853163401287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,64,128,1,float16,fp8,0,1.3960693677266438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,64,0,1,float16,fp8,0,2.031520048777262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,64,128,1,float16,float16,0,0.7565813064575195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,64,0,1,float16,float16,0,1.084282636642456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,64,0,1,fp8,fp8,0,1.8333013852437336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,64,128,1,float16,fp8,0,0.7648373444875082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,64,128,1,fp8,fp8,0,0.7072106997172037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,64,0,1,fp8,fp8,0,1.002410650253296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,64,0,1,float16,fp8,0,1.0913386344909668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,64,128,1,float16,float16,0,0.6577279965082804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,64,128,1,float16,fp8,0,0.6638880173365275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,64,128,1,fp8,fp8,0,0.5935573180516561
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,64,0,1,float16,float16,0,0.9764426549275717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,64,128,1,float16,float16,0,0.6668586730957031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,64,0,1,float16,fp8,0,0.9828373591105143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,64,0,1,fp8,fp8,0,0.8866186936696371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,64,128,1,fp8,fp8,0,0.6033173402150472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,64,128,1,float16,fp8,0,0.6719253063201904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,64,0,1,float16,float16,0,0.9869013627370199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,64,0,1,fp8,fp8,0,0.8967200120290121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,64,128,1,float16,float16,0,0.6729546387990316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,64,0,1,float16,fp8,0,0.98963729540507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,64,128,1,fp8,fp8,0,0.6111786762873331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,64,128,1,float16,fp8,0,0.6782080332438151
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,64,0,1,float16,float16,0,0.9904053211212158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,64,0,1,fp8,fp8,0,0.9048319657643636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,64,0,1,float16,fp8,0,0.9964799880981445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,64,128,1,float16,float16,0,0.6909013589223226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,64,0,1,float16,float16,0,1.0098559856414795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,64,128,1,fp8,fp8,0,0.6316320101420084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,64,128,1,float16,fp8,0,0.6956853071848551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,64,0,1,float16,fp8,0,1.0177280108133953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,64,128,1,float16,float16,0,0.38794132073720294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,64,0,1,float16,float16,0,0.5563999811808268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,64,0,1,fp8,fp8,0,0.9250400066375732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,64,128,1,float16,fp8,0,0.39553598562876385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,64,128,1,fp8,fp8,0,0.36529068152109784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,64,128,1,float16,float16,0,0.33931732177734375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,64,0,1,float16,fp8,0,0.5644640127817789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,64,0,1,fp8,fp8,0,0.5184053182601929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,64,0,1,float16,float16,0,0.5027466615041097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,64,128,1,fp8,fp8,0,0.3118506669998169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,64,128,1,float16,fp8,0,0.34302934010823566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,64,0,1,float16,fp8,0,0.5058720111846924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,64,0,1,fp8,fp8,0,0.46255465348561603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,64,128,1,float16,float16,0,0.34350399176279706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,64,0,1,float16,float16,0,0.5061866839726766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,64,128,1,float16,fp8,0,0.3459999958674113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,64,128,1,fp8,fp8,0,0.3162613312403361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,64,0,1,float16,fp8,0,0.5109119812647501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,64,0,1,fp8,fp8,0,0.46805866559346515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,64,128,1,float16,float16,0,0.34779198964436847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,64,0,1,float16,float16,0,0.5122880140940348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,64,128,1,float16,fp8,0,0.3529493411382039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,64,128,1,fp8,fp8,0,0.3200213313102722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,64,0,1,float16,fp8,0,0.5156693458557129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,64,128,1,float16,float16,0,0.35763732592264813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,64,0,1,fp8,fp8,0,0.47062400976816815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,64,0,1,float16,float16,0,0.5226293404897054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,64,128,1,float16,fp8,0,0.36135466893513996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,64,128,1,fp8,fp8,0,0.32843200365702313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,64,0,1,float16,fp8,0,0.5248213211695353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,64,128,1,float16,float16,0,0.2076479991277059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,64,0,1,fp8,fp8,0,0.48117868105570477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,64,0,1,float16,float16,0,0.29575467109680176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,64,128,1,float16,fp8,0,0.21247466405232748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,64,128,1,fp8,fp8,0,0.1977333426475525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,64,0,1,float16,fp8,0,0.3004746635754903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,64,0,1,fp8,fp8,0,0.2795093258221944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,64,128,1,float16,float16,0,0.17739200592041016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,64,0,1,float16,float16,0,0.26443199316660565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,64,128,1,float16,fp8,0,0.17959467569986978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,64,128,1,fp8,fp8,0,0.16857065757115683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,64,0,1,float16,fp8,0,0.26652799050013226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,64,128,1,float16,float16,0,0.1791093349456787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,64,0,1,fp8,fp8,0,0.24955199162165323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,64,0,1,float16,float16,0,0.26497066020965576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,64,128,1,float16,fp8,0,0.18147200345993042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,64,128,1,fp8,fp8,0,0.16879467169443765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,64,0,1,float16,fp8,0,0.269322673479716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,64,128,1,float16,float16,0,0.18333866198857626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,64,0,1,fp8,fp8,0,0.25084267059961957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,64,0,1,float16,float16,0,0.2696160078048706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,64,128,1,float16,fp8,0,0.1851253310839335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,64,128,1,fp8,fp8,0,0.17195733388264975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,64,0,1,float16,fp8,0,0.2712533275286357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,64,0,1,fp8,fp8,0,0.25433599948883057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,64,128,1,float16,float16,0,0.18965866168340048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,64,0,1,float16,float16,0,0.2754506667455037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,64,128,1,float16,fp8,0,0.19284266233444214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,64,128,1,fp8,fp8,0,0.17753066619237265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,64,128,1,float16,float16,0,0.11624000469843547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,64,0,1,float16,fp8,0,0.28017600377400714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,64,0,1,fp8,fp8,0,0.25893867015838623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,64,0,1,float16,float16,0,0.16396266222000122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,64,128,1,float16,fp8,0,0.11950400471687317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,64,128,1,fp8,fp8,0,0.11379200220108032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,64,0,1,float16,fp8,0,0.16875733931859335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,64,0,1,fp8,fp8,0,0.15896000464757284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,64,128,1,float16,float16,0,0.1013759970664978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,64,0,1,float16,float16,0,0.14898133277893066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,64,128,1,float16,fp8,0,0.10327999790509541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,64,128,1,fp8,fp8,0,0.09318932890892029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,64,0,1,float16,fp8,0,0.1516213317712148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,64,0,1,fp8,fp8,0,0.13821867108345032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,64,128,1,float16,float16,0,0.10180266698201497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,64,0,1,float16,float16,0,0.15103999773661295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,64,128,1,float16,fp8,0,0.10332266489664714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,64,128,1,fp8,fp8,0,0.09292266766230266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,64,0,1,float16,fp8,0,0.15156267086664835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,64,0,1,fp8,fp8,0,0.1381706694761912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,64,128,1,float16,float16,0,0.1013759970664978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,64,0,1,float16,float16,0,0.15175466736157736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,64,128,1,fp8,fp8,0,0.09317333499590556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,64,128,1,float16,fp8,0,0.10469333330790202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,64,128,1,float16,float16,0,0.10513066252072652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,64,0,1,float16,fp8,0,0.1530933380126953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,64,0,1,fp8,fp8,0,0.13992533087730408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,64,128,1,float16,fp8,0,0.10729599992434184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,64,128,1,fp8,fp8,0,0.10124799609184265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,64,0,1,float16,float16,0,0.15441600481669107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,64,128,1,float16,float16,0,0.06862933437029521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,64,0,1,float16,fp8,0,0.1550986667474111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,64,0,1,fp8,fp8,0,0.14621866742769876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,64,0,1,float16,float16,0,0.09860799709955852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,64,128,1,float16,fp8,0,0.06839466591676076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,64,128,1,fp8,fp8,0,0.06610666712125142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,64,0,1,float16,fp8,0,0.10110400120417277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,64,0,1,fp8,fp8,0,0.09416533509890239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,64,128,1,float16,float16,0,0.063509335120519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,64,128,1,fp8,fp8,0,0.06017066538333893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,64,128,1,float16,fp8,0,0.06362133224805196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,64,0,1,float16,float16,0,0.09619733691215515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,64,0,1,float16,fp8,0,0.09505066275596619
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,64,128,1,float16,float16,0,0.0645066648721695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,64,0,1,fp8,fp8,0,0.08884800473848979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,64,128,1,float16,fp8,0,0.06408533453941345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,64,128,1,fp8,fp8,0,0.060453335444132485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,64,0,1,float16,float16,0,0.09533333778381348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,64,0,1,float16,fp8,0,0.09609066446622212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,64,0,1,fp8,fp8,0,0.08904533584912618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,64,128,1,float16,float16,0,0.064560001095136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,64,0,1,float16,float16,0,0.09517332911491394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,64,128,1,float16,fp8,0,0.06458666423956554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,64,128,1,fp8,fp8,0,0.06037333110968272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,64,128,1,float16,float16,0,0.06418666740258534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,64,0,1,float16,fp8,0,0.09519466757774353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,64,0,1,fp8,fp8,0,0.08890666564305623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,64,128,1,float16,fp8,0,0.0650079995393753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,64,128,1,fp8,fp8,0,0.06244266529877981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,64,0,1,float16,float16,0,0.09734933574994405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,64,128,1,float16,float16,0,0.05625600119431814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,64,0,1,float16,fp8,0,0.0972106655438741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,64,0,1,fp8,fp8,0,0.09101866682370503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,64,128,1,float16,fp8,0,0.053861334919929504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,64,128,1,fp8,fp8,0,0.05209066470464071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,64,0,1,float16,float16,0,0.07606400052706401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,64,0,1,float16,fp8,0,0.07653866708278656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,64,0,1,fp8,fp8,0,0.07042666773001353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,64,128,1,float16,float16,0,0.05589333176612854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,64,128,1,float16,fp8,0,0.05402666827042898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,64,0,1,float16,float16,0,0.07606400052706401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,64,128,1,fp8,fp8,0,0.0521066685517629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,64,0,1,float16,fp8,0,0.07527466615041097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,64,128,1,float16,float16,0,0.054192001620928444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,64,0,1,fp8,fp8,0,0.0701333334048589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,64,128,1,float16,fp8,0,0.05416533350944519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,64,128,1,fp8,fp8,0,0.05179733534653982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,64,0,1,float16,float16,0,0.07615466912587483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,64,128,1,float16,float16,0,0.055914665261904396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,64,0,1,fp8,fp8,0,0.07048533360163371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,64,0,1,float16,fp8,0,0.07501866420110066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,64,128,1,float16,fp8,0,0.05416533350944519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,64,0,1,float16,float16,0,0.07660800218582153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,64,128,1,fp8,fp8,0,0.05235733091831207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,64,0,1,float16,fp8,0,0.07515733440717061
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,64,128,1,float16,float16,0,0.05596800148487091
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,64,0,1,fp8,fp8,0,0.07050666709740956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,64,0,1,float16,float16,0,0.07640533149242401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,64,128,1,fp8,fp8,0,0.0524533341328303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,64,128,1,float16,fp8,0,0.056015998125076294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,64,0,1,float16,fp8,0,0.07640000184377034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,64,0,1,fp8,fp8,0,0.07046400010585785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,64,128,1,float16,float16,0,0.9717386563618978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,64,128,1,fp8,fp8,0,0.87608536084493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,64,128,1,float16,fp8,0,0.9790453116099039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,64,0,1,float16,float16,0,1.284058650334676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,64,128,1,float16,float16,0,0.9901386896769205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,64,0,1,float16,fp8,0,1.2870399951934814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,64,0,1,fp8,fp8,0,1.1602773666381836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,64,0,1,float16,float16,0,1.297210693359375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,64,128,1,fp8,fp8,0,0.8882986704508463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,64,128,1,float16,fp8,0,0.9937600294748942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,64,0,1,float16,fp8,0,1.3015626271565754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,64,0,1,fp8,fp8,0,1.1727360089619954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,64,128,1,float16,float16,0,1.0002240339914958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,64,0,1,float16,float16,0,1.3148372968037922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,64,128,1,float16,fp8,0,1.0048106511433919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,64,128,1,fp8,fp8,0,0.8987627029418945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,64,0,1,float16,fp8,0,1.314079999923706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,64,128,1,float16,float16,0,1.01909335454305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,64,0,1,fp8,fp8,0,1.1847413380940754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,64,0,1,float16,float16,0,1.3345173199971516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,64,128,1,float16,fp8,0,1.0248479843139648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,64,128,1,fp8,fp8,0,0.9256213506062826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,64,0,1,float16,fp8,0,1.3399252891540527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,64,128,1,float16,float16,0,0.5660106738408407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,64,0,1,fp8,fp8,0,1.2104533513387044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,64,0,1,float16,float16,0,0.7314453125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,64,128,1,float16,fp8,0,0.5758613348007202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,64,128,1,fp8,fp8,0,0.5321919918060303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,64,0,1,float16,fp8,0,0.74180801709493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,64,0,1,fp8,fp8,0,0.680560032526652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,64,128,1,float16,float16,0,0.4941226641337077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,64,0,1,float16,float16,0,0.6539253393809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,64,128,1,float16,fp8,0,0.49853865305582684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,64,128,1,fp8,fp8,0,0.44946134090423584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,64,0,1,fp8,fp8,0,0.5950719912846884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,64,0,1,float16,fp8,0,0.6574560006459554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,64,128,1,float16,float16,0,0.4998133182525635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,64,128,1,float16,fp8,0,0.5045333305994669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,64,128,1,fp8,fp8,0,0.45573333899180096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,64,0,1,float16,float16,0,0.6587040026982626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,64,128,1,float16,float16,0,0.5052800178527832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,64,0,1,float16,fp8,0,0.6656266848246256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,64,0,1,fp8,fp8,0,0.6027199824651083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,64,128,1,fp8,fp8,0,0.46188799540201825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,64,128,1,float16,fp8,0,0.5123413403828939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,64,0,1,float16,float16,0,0.6639626820882162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,64,128,1,float16,float16,0,0.5179093281428019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,64,0,1,fp8,fp8,0,0.6074986855189005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,64,0,1,float16,fp8,0,0.6715306440989176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,64,0,1,float16,float16,0,0.6761066913604736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,64,128,1,float16,fp8,0,0.5245866775512695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,64,128,1,fp8,fp8,0,0.4747626781463623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,64,0,1,fp8,fp8,0,0.6192586819330851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,64,0,1,float16,fp8,0,0.6837440331776937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,64,128,1,float16,float16,0,0.29498666524887085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,64,0,1,float16,float16,0,0.3800693353017171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,64,128,1,float16,fp8,0,0.3013919989267985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,64,128,1,fp8,fp8,0,0.27896533409754437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,64,0,1,float16,fp8,0,0.3879520098368327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,64,0,1,fp8,fp8,0,0.3551946481068929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,64,128,1,float16,float16,0,0.25706666707992554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,64,0,1,float16,float16,0,0.3381066719690959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,64,128,1,float16,fp8,0,0.2590240041414897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,64,128,1,fp8,fp8,0,0.23626132806142172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,64,0,1,float16,fp8,0,0.34003734588623047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,64,128,1,float16,float16,0,0.2584693431854248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,64,0,1,fp8,fp8,0,0.31217066446940106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,64,0,1,float16,float16,0,0.33905601501464844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,64,128,1,float16,fp8,0,0.2600746750831604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,64,128,1,fp8,fp8,0,0.24155199527740479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,64,0,1,float16,fp8,0,0.34405334790547687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,64,0,1,fp8,fp8,0,0.3163786729176839
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,64,128,1,float16,float16,0,0.2630773385365804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,64,0,1,float16,float16,0,0.3476266860961914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,64,128,1,float16,fp8,0,0.2658986647923787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,64,128,1,fp8,fp8,0,0.24445867538452148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,64,0,1,float16,fp8,0,0.3496053218841553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,64,0,1,fp8,fp8,0,0.3211146593093872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,64,128,1,float16,float16,0,0.2698720097541809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,64,0,1,float16,float16,0,0.3537600040435791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,64,128,1,float16,fp8,0,0.273418664932251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,64,128,1,fp8,fp8,0,0.25075199206670123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,64,0,1,float16,fp8,0,0.35736000537872314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,64,0,1,fp8,fp8,0,0.3269493381182353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,64,128,1,float16,float16,0,0.15877866744995117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,64,0,1,float16,float16,0,0.20629332462946573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,64,128,1,float16,fp8,0,0.16250133514404297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,64,128,1,fp8,fp8,0,0.15250666936238608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,64,0,1,float16,fp8,0,0.20918933550516763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,64,0,1,fp8,fp8,0,0.19382399320602417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,64,128,1,float16,float16,0,0.13422933220863342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,64,0,1,float16,float16,0,0.17802133162816366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,64,128,1,float16,fp8,0,0.13449600338935852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,64,128,1,fp8,fp8,0,0.1243893305460612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,64,0,1,float16,fp8,0,0.1820853352546692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,64,0,1,fp8,fp8,0,0.1665600041548411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,64,128,1,float16,float16,0,0.13422399759292603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,64,0,1,float16,float16,0,0.18231467405954996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,64,128,1,float16,fp8,0,0.1358560025691986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,64,128,1,fp8,fp8,0,0.12796800335248312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,64,0,1,float16,fp8,0,0.1816800038019816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,64,0,1,fp8,fp8,0,0.1700800061225891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,64,128,1,float16,float16,0,0.1362986663977305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,64,0,1,float16,float16,0,0.18209065993626913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,64,128,1,float16,fp8,0,0.13966932892799377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,64,128,1,fp8,fp8,0,0.1318666636943817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,64,0,1,float16,fp8,0,0.18549333016077676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,64,0,1,fp8,fp8,0,0.1729173262914022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,64,128,1,float16,float16,0,0.14285866419474283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,64,0,1,float16,float16,0,0.18992000818252563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,64,128,1,float16,fp8,0,0.14477333426475525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,64,128,1,fp8,fp8,0,0.13741333285967508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,64,0,1,float16,fp8,0,0.1918026606241862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,64,128,1,float16,float16,0,0.08622933427492778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,64,0,1,fp8,fp8,0,0.18075199921925864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,64,0,1,float16,float16,0,0.11371200283368428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,64,128,1,float16,fp8,0,0.08887466788291931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,64,128,1,fp8,fp8,0,0.0878613293170929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,64,0,1,float16,fp8,0,0.1167680025100708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,64,0,1,fp8,fp8,0,0.11370666821797688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,64,128,1,float16,float16,0,0.0780266672372818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,64,0,1,float16,float16,0,0.10567466417948405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,64,128,1,float16,fp8,0,0.07881066699822743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,64,128,1,fp8,fp8,0,0.07221333185831706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,64,0,1,float16,fp8,0,0.10640533765157063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,64,0,1,fp8,fp8,0,0.09798933068911235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,64,128,1,float16,float16,0,0.07841599980990092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,64,0,1,float16,float16,0,0.1060693363348643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,64,128,1,float16,fp8,0,0.07876266539096832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,64,128,1,fp8,fp8,0,0.07276266813278198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,64,0,1,float16,fp8,0,0.1053706705570221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,64,0,1,fp8,fp8,0,0.09713600079218547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,64,128,1,float16,float16,0,0.07870933413505554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,64,0,1,float16,float16,0,0.1055519978205363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,64,128,1,float16,fp8,0,0.0794293334086736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,64,128,1,fp8,fp8,0,0.07272533575693767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,64,0,1,float16,fp8,0,0.10761066277821858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,64,128,1,float16,float16,0,0.08054399987061818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,64,0,1,fp8,fp8,0,0.09877866506576538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,64,128,1,float16,fp8,0,0.08106666803359985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,64,128,1,fp8,fp8,0,0.07659733295440674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,64,0,1,float16,float16,0,0.1076693336168925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,64,128,1,float16,float16,0,0.0518453319867452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,64,0,1,float16,fp8,0,0.10946133732795715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,64,0,1,fp8,fp8,0,0.10140267014503479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,64,0,1,float16,float16,0,0.07241599758466084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,64,128,1,float16,fp8,0,0.054042667150497437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,64,128,1,fp8,fp8,0,0.05227733155091604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,64,0,1,float16,fp8,0,0.07401066521803538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,64,0,1,fp8,fp8,0,0.07062399884064992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,64,128,1,float16,float16,0,0.05008533100287119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,64,0,1,float16,float16,0,0.07069333394368489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,64,128,1,float16,fp8,0,0.05064533154169718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,64,128,1,fp8,fp8,0,0.045978665351867676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,64,0,1,float16,fp8,0,0.07048533360163371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,64,0,1,fp8,fp8,0,0.06411199768384297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,64,128,1,float16,float16,0,0.04966400067011515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,64,128,1,float16,fp8,0,0.05009600023428599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,64,0,1,float16,float16,0,0.0701333334048589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,64,128,1,fp8,fp8,0,0.04833066463470459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,64,0,1,float16,fp8,0,0.07030933101971944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,64,128,1,float16,float16,0,0.05003733436266581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,64,0,1,fp8,fp8,0,0.06639466683069865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,64,128,1,float16,fp8,0,0.050245334704717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,64,128,1,fp8,fp8,0,0.047968000173568726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,64,0,1,float16,float16,0,0.06997333467006683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,64,0,1,float16,fp8,0,0.07050133248170216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,64,128,1,float16,float16,0,0.05190399785836538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,64,0,1,fp8,fp8,0,0.06611733138561249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,64,128,1,float16,fp8,0,0.05195199946562449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,64,0,1,float16,float16,0,0.0707946668068568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,64,128,1,fp8,fp8,0,0.0476746658484141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,64,128,1,float16,float16,0,0.04159466673930486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,64,0,1,float16,fp8,0,0.07130133112271626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,64,0,1,fp8,fp8,0,0.06653866668542226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,64,0,1,float16,float16,0,0.05301333467165629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,64,128,1,float16,fp8,0,0.04171733558177948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,64,128,1,fp8,fp8,0,0.03976000100374222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,64,0,1,float16,fp8,0,0.053904001911481224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,64,128,1,float16,float16,0,0.04191466669241587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,64,0,1,fp8,fp8,0,0.05028266708056132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,64,0,1,float16,float16,0,0.05227733155091604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,64,128,1,float16,fp8,0,0.041589332123597465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,64,128,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,64,0,1,float16,fp8,0,0.05411200225353241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,64,0,1,fp8,fp8,0,0.05009066561857859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,64,128,1,float16,float16,0,0.04195199906826019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,64,0,1,float16,float16,0,0.05195199946562449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,64,128,1,float16,fp8,0,0.04161600023508072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,64,128,1,fp8,fp8,0,0.039818666875362396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,64,0,1,float16,fp8,0,0.05220800141493479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,64,0,1,fp8,fp8,0,0.05026133358478546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,64,128,1,float16,float16,0,0.04177066683769226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,64,0,1,float16,float16,0,0.05221866567929586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,64,128,1,float16,fp8,0,0.04205866654713949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,64,128,1,fp8,fp8,0,0.039850667119026184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,64,0,1,float16,fp8,0,0.051818668842315674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,64,0,1,fp8,fp8,0,0.050255998969078064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,64,128,1,float16,float16,0,0.04155733436346054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,64,0,1,float16,float16,0,0.0528106689453125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,64,128,1,float16,fp8,0,0.041690667470296226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,64,128,1,fp8,fp8,0,0.03961066653331121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,64,0,1,float16,fp8,0,0.05291733145713806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,64,0,1,fp8,fp8,0,0.0497920016447703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,64,128,1,float16,float16,0,1.1516266663869221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,64,128,1,float16,fp8,0,1.1493706703186035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,64,0,1,float16,float16,0,1.3530294100443523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,64,128,1,fp8,fp8,0,1.0570613543192546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,64,0,1,float16,fp8,0,1.3498506546020508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,64,0,1,fp8,fp8,0,1.2363466421763103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,64,128,1,float16,float16,0,1.1622453530629475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,64,0,1,float16,float16,0,1.3659040133158367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,64,128,1,float16,fp8,0,1.157914638519287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,64,128,1,fp8,fp8,0,1.1057173411051433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,64,0,1,float16,fp8,0,1.3597493171691895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,64,0,1,fp8,fp8,0,1.2943999767303467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,64,128,1,float16,float16,0,1.1685120264689128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,64,0,1,float16,float16,0,1.369765281677246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,64,128,1,float16,fp8,0,1.1662879784901936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,64,128,1,fp8,fp8,0,1.1418773333231609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,64,0,1,float16,fp8,0,1.367136001586914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,64,0,1,fp8,fp8,0,1.3229653040568035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,64,128,1,float16,float16,0,1.2000693480173747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,64,128,1,float16,fp8,0,1.1946400006612141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,64,0,1,float16,float16,0,1.3920693397521973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,64,128,1,fp8,fp8,0,1.1622506777445476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,64,128,1,float16,float16,0,0.6363786856333414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,64,0,1,fp8,fp8,0,1.3431946436564128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,64,0,1,float16,fp8,0,1.3899733225504558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,64,0,1,float16,float16,0,0.7446026802062988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,64,128,1,fp8,fp8,0,0.6148000160853068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,64,128,1,float16,fp8,0,0.6246773401896158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,64,0,1,float16,fp8,0,0.7338399887084961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,64,128,1,float16,float16,0,0.5839786529541016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,64,0,1,fp8,fp8,0,0.7092639605204264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,64,0,1,float16,float16,0,0.6890453497568766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,64,128,1,float16,fp8,0,0.5848906834920248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,64,128,1,fp8,fp8,0,0.5384320020675659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,64,0,1,float16,fp8,0,0.6890613238016764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,64,0,1,fp8,fp8,0,0.6305600007375082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,64,128,1,float16,float16,0,0.5914453268051147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,64,0,1,float16,float16,0,0.6927253405253092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,64,128,1,float16,fp8,0,0.589679996172587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,64,128,1,fp8,fp8,0,0.5500746568044027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,64,0,1,float16,fp8,0,0.6916960080464681
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,64,0,1,fp8,fp8,0,0.6436373392740885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,64,128,1,float16,float16,0,0.5949813524881998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,64,0,1,float16,float16,0,0.6986186504364014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,64,128,1,float16,fp8,0,0.5932053327560425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,64,128,1,fp8,fp8,0,0.5551946560541788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,64,0,1,float16,fp8,0,0.6963093280792236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,64,0,1,fp8,fp8,0,0.6515893141428629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,64,128,1,float16,float16,0,0.6039413213729858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,64,128,1,float16,fp8,0,0.600762685139974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,64,0,1,float16,float16,0,0.7064533233642578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,64,128,1,fp8,fp8,0,0.5758613348007202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,64,0,1,float16,fp8,0,0.7048587004343668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,64,128,1,float16,float16,0,0.33212800820668537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,64,0,1,float16,float16,0,0.3876426617304484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,64,128,1,float16,fp8,0,0.3258506655693054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,64,0,1,fp8,fp8,0,0.6686506271362305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,64,128,1,fp8,fp8,0,0.31752000252405804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,64,0,1,float16,fp8,0,0.38147199153900146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,64,0,1,fp8,fp8,0,0.3696426550547282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,64,128,1,float16,float16,0,0.30526934067408246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,64,0,1,float16,float16,0,0.35653332869211835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,64,128,1,float16,fp8,0,0.3041653235753377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,64,128,1,fp8,fp8,0,0.27989866336186725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,64,0,1,float16,fp8,0,0.35767467816670734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,64,0,1,fp8,fp8,0,0.3285920023918152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,64,128,1,float16,float16,0,0.3060373266537984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,64,0,1,float16,float16,0,0.3594826857248942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,64,128,1,float16,fp8,0,0.3039039969444275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,64,128,1,fp8,fp8,0,0.28428266445795697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,64,0,1,float16,fp8,0,0.3583199977874756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,64,128,1,float16,float16,0,0.30748265981674194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,64,0,1,fp8,fp8,0,0.3328106602032979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,64,0,1,float16,float16,0,0.3630773226420085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,64,128,1,float16,fp8,0,0.3079306681950887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,64,128,1,fp8,fp8,0,0.28781332572301227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,64,0,1,float16,fp8,0,0.3606933355331421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,64,0,1,fp8,fp8,0,0.3378026485443115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,64,128,1,float16,float16,0,0.313429335753123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,64,0,1,float16,float16,0,0.36720534165700275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,64,128,1,float16,fp8,0,0.3118079900741577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,64,128,1,fp8,fp8,0,0.29580267270406085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,64,128,1,float16,float16,0,0.17540266116460165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,64,0,1,float16,fp8,0,0.36540265878041583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,64,0,1,fp8,fp8,0,0.34565865993499756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,64,0,1,float16,float16,0,0.20509332418441772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,64,128,1,float16,fp8,0,0.17359467347462973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,64,128,1,fp8,fp8,0,0.17086933056513467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,64,0,1,float16,fp8,0,0.2034133275349935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,64,0,1,fp8,fp8,0,0.19630932807922363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,64,128,1,float16,float16,0,0.16099733114242554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,64,0,1,float16,float16,0,0.18736533323923746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,64,128,1,float16,fp8,0,0.16057599584261575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,64,128,1,fp8,fp8,0,0.14899200201034546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,64,0,1,float16,fp8,0,0.18877865870793661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,64,0,1,fp8,fp8,0,0.17510932683944702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,64,128,1,float16,float16,0,0.16168533762296042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,64,0,1,float16,float16,0,0.19003732999165854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,64,128,1,float16,fp8,0,0.16105600198109946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,64,128,1,fp8,fp8,0,0.15243732929229736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,64,0,1,float16,fp8,0,0.18959999084472656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,64,0,1,fp8,fp8,0,0.178874671459198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,64,128,1,float16,float16,0,0.16353066762288412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,64,0,1,float16,float16,0,0.1914400060971578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,64,128,1,float16,fp8,0,0.1618666648864746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,64,128,1,fp8,fp8,0,0.15547200043996176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,64,0,1,float16,fp8,0,0.19196800390879312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,64,0,1,fp8,fp8,0,0.1814346710840861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,64,128,1,float16,float16,0,0.16562666495641074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,64,0,1,float16,float16,0,0.19409066438674927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,64,128,1,float16,fp8,0,0.16594133774439493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,64,128,1,fp8,fp8,0,0.16033599774042764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,64,0,1,float16,fp8,0,0.1936053236325582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,64,128,1,float16,float16,0,0.09875733653704326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,64,0,1,fp8,fp8,0,0.1846933364868164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,64,0,1,float16,float16,0,0.11614400148391724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,64,128,1,float16,fp8,0,0.09716799855232239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,64,128,1,fp8,fp8,0,0.09696533282597859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,64,0,1,float16,fp8,0,0.11346667011578877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,64,0,1,fp8,fp8,0,0.11303466558456421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,64,128,1,float16,float16,0,0.08891200025876363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,64,0,1,float16,float16,0,0.10745066404342651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,64,128,1,float16,fp8,0,0.08902399738629659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,64,128,1,fp8,fp8,0,0.08118933439254761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,64,0,1,float16,fp8,0,0.10729066530863444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,64,0,1,fp8,fp8,0,0.0969546635945638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,64,128,1,float16,float16,0,0.0897226631641388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,64,0,1,float16,float16,0,0.1058186690012614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,64,128,1,float16,fp8,0,0.09005332986513774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,64,128,1,fp8,fp8,0,0.08321066697438557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,64,0,1,float16,fp8,0,0.10627733667691548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,64,0,1,fp8,fp8,0,0.09885866443316142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,64,128,1,float16,float16,0,0.09096533060073853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,64,0,1,float16,float16,0,0.1062506635983785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,64,128,1,float16,fp8,0,0.09100266297658284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,64,128,1,fp8,fp8,0,0.08479467034339905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,64,0,1,float16,fp8,0,0.10586133599281311
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,64,0,1,fp8,fp8,0,0.09975999593734741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,64,128,1,float16,float16,0,0.09128533800443013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,64,0,1,float16,float16,0,0.10878400007883708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,64,128,1,float16,fp8,0,0.09124267101287842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,64,128,1,fp8,fp8,0,0.08660800258318584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,64,0,1,float16,fp8,0,0.10783466696739197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,64,128,1,float16,float16,0,0.05553600192070007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,64,0,1,fp8,fp8,0,0.10130133231480916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,64,0,1,float16,float16,0,0.06677866478761037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,64,128,1,float16,fp8,0,0.05592533449331919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,64,128,1,fp8,fp8,0,0.05429333448410034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,64,0,1,float16,fp8,0,0.06599466502666473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,64,0,1,fp8,fp8,0,0.06394133468468984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,64,128,1,float16,float16,0,0.054048001766204834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,64,0,1,float16,float16,0,0.06530133386452992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,64,128,1,float16,fp8,0,0.05403733253479004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,64,128,1,fp8,fp8,0,0.05201066533724467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,64,0,1,float16,fp8,0,0.06524266799290974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,64,0,1,fp8,fp8,0,0.06029866635799408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,64,128,1,float16,float16,0,0.054325332244237266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,64,0,1,float16,float16,0,0.06478933493296306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,64,128,1,float16,fp8,0,0.05416533350944519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,64,128,1,fp8,fp8,0,0.05201066533724467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,64,0,1,float16,fp8,0,0.06539733211199443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,64,0,1,fp8,fp8,0,0.060415998101234436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,64,128,1,float16,float16,0,0.05421866476535797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,64,0,1,float16,float16,0,0.06461333235104878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,64,128,1,float16,fp8,0,0.0551146666208903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,64,128,1,fp8,fp8,0,0.05179733534653982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,64,0,1,float16,fp8,0,0.06436799963315327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,64,0,1,fp8,fp8,0,0.062218666076660156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,64,128,1,float16,float16,0,0.05407999952634176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,64,128,1,float16,fp8,0,0.055013333757718406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,64,0,1,float16,float16,0,0.06595733265082042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,64,128,1,fp8,fp8,0,0.05227200190226237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,64,0,1,float16,fp8,0,0.0665226678053538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,64,0,1,fp8,fp8,0,0.061648001273473106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,64,128,1,float16,float16,0,0.038373333712418876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,64,0,1,float16,float16,0,0.04385599990685781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,64,128,1,float16,fp8,0,0.03781333317359289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,64,128,1,fp8,fp8,0,0.0363520011305809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,64,0,1,float16,fp8,0,0.04376000165939331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,64,0,1,fp8,fp8,0,0.041589332123597465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,64,128,1,float16,float16,0,0.036544000109036766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,64,0,1,float16,float16,0,0.043840001026789345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,64,128,1,float16,fp8,0,0.03618666778008143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,64,128,1,fp8,fp8,0,0.035386666655540466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,64,0,1,float16,fp8,0,0.04359466830889384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,64,0,1,fp8,fp8,0,0.04126933217048645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,64,128,1,float16,float16,0,0.03719466676314672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,64,0,1,float16,float16,0,0.044026667873064675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,64,128,1,float16,fp8,0,0.03754133234421412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,64,128,1,fp8,fp8,0,0.03551466763019562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,64,0,1,float16,fp8,0,0.04401599864164988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,64,0,1,fp8,fp8,0,0.04299733539422353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,64,128,1,float16,float16,0,0.03809066613515218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,64,0,1,float16,float16,0,0.04388799766699473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,64,128,1,float16,fp8,0,0.037461332976818085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,64,128,1,fp8,fp8,0,0.03579733272393545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,64,0,1,float16,fp8,0,0.04383466641108195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,64,0,1,fp8,fp8,0,0.042037333051363625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,64,128,1,float16,float16,0,0.03772799919048945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,64,0,1,float16,float16,0,0.045663997530937195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,64,128,1,float16,fp8,0,0.037503999968369804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,64,128,1,fp8,fp8,0,0.03770666569471359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,64,0,1,float16,fp8,0,0.04602666695912679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,64,0,1,fp8,fp8,0,0.04172799984614054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,64,128,1,float16,float16,0,0.027845333019892376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,64,0,1,float16,float16,0,0.03348266581694285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,64,128,1,float16,fp8,0,0.027402666707833607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,64,128,1,fp8,fp8,0,0.02754666656255722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,64,0,1,float16,fp8,0,0.033946665624777474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,64,0,1,fp8,fp8,0,0.03150933235883713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,64,0,1,float16,float16,0,0.033589333295822144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,64,128,1,float16,float16,0,0.02762666592995326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,64,128,1,float16,fp8,0,0.027232001225153606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,64,128,1,fp8,fp8,0,0.026714667677879333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,64,0,1,float16,fp8,0,0.03350933392842611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,64,0,1,fp8,fp8,0,0.031194667021433514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,64,128,1,float16,float16,0,0.02739733209212621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,64,0,1,float16,float16,0,0.03335466732581457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,64,128,1,float16,fp8,0,0.02712533374627431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,64,128,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,64,0,1,float16,fp8,0,0.03162666658560435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,64,0,1,fp8,fp8,0,0.03173333406448364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,64,128,1,float16,float16,0,0.027258666853109997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,64,0,1,float16,float16,0,0.032645332316557564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,64,128,1,float16,fp8,0,0.027808000644048054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,64,128,1,fp8,fp8,0,0.02731200059254964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,64,0,1,float16,fp8,0,0.033386667569478355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,64,0,1,fp8,fp8,0,0.03159466634194056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,64,128,1,float16,float16,0,0.027637332677841187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,64,0,1,float16,float16,0,0.033471999069054924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,64,128,1,float16,fp8,0,0.027503999571005504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,64,128,1,fp8,fp8,0,0.027274665733178455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,64,0,1,float16,fp8,0,0.03363200028737386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,64,0,1,fp8,fp8,0,0.03162133445342382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,64,128,1,float16,float16,0,1.1193386713663738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,64,0,1,float16,float16,0,1.131376028060913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,64,128,1,float16,fp8,0,1.117205301920573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,64,128,1,fp8,fp8,0,1.0297333399454753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,64,0,1,float16,fp8,0,1.1296532948811848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,64,0,1,fp8,fp8,0,1.0386719703674316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,64,128,1,float16,float16,0,1.1308266321818035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,64,0,1,float16,float16,0,1.1467680136362712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,64,128,1,float16,fp8,0,1.1271146933237712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,64,128,1,fp8,fp8,0,1.0671947002410889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,64,0,1,fp8,fp8,0,1.0808320045471191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,64,0,1,float16,fp8,0,1.1399892965952556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,64,128,1,float16,float16,0,1.137178659439087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,64,0,1,float16,float16,0,1.1523093382517497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,64,128,1,float16,fp8,0,1.133519967397054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,64,128,1,fp8,fp8,0,1.1098079681396484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,64,0,1,fp8,fp8,0,1.1199519634246826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,64,0,1,float16,fp8,0,1.1522880395253499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,64,128,1,float16,float16,0,1.1611253420511882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,64,0,1,float16,float16,0,1.1812053521474202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,64,128,1,float16,fp8,0,1.1651573181152344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,64,0,1,fp8,fp8,0,1.1491519610087078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,64,0,1,float16,fp8,0,1.1795893510182698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,64,128,1,fp8,fp8,0,1.132863998413086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,64,128,1,float16,float16,0,0.6205493211746216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,64,128,1,float16,fp8,0,0.6096266508102417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,64,0,1,float16,float16,0,0.6327413320541382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,64,128,1,fp8,fp8,0,0.6017386515935262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,64,0,1,float16,fp8,0,0.6205600102742513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,64,0,1,fp8,fp8,0,0.6075626611709595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,64,128,1,float16,float16,0,0.5681013266245524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,64,0,1,float16,float16,0,0.5769226551055908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,64,128,1,float16,fp8,0,0.569536010424296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,64,128,1,fp8,fp8,0,0.5229706764221191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,64,0,1,float16,fp8,0,0.5769493182500204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,64,0,1,fp8,fp8,0,0.5293226639429728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,64,128,1,float16,float16,0,0.5740906794865926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,64,0,1,float16,float16,0,0.5822186470031738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,64,128,1,float16,fp8,0,0.5721439917882284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,64,128,1,fp8,fp8,0,0.5362773338953654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,64,0,1,float16,fp8,0,0.5805013179779053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,64,0,1,fp8,fp8,0,0.5411466757456461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,64,128,1,float16,float16,0,0.5789013306299845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,64,0,1,float16,float16,0,0.5877866744995117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,64,128,1,float16,fp8,0,0.577018658320109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,64,128,1,fp8,fp8,0,0.5427999893824259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,64,0,1,float16,fp8,0,0.5846560001373291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,64,0,1,fp8,fp8,0,0.5479626655578613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,64,128,1,float16,float16,0,0.5891679922739664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,64,0,1,float16,float16,0,0.5948959986368815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,64,128,1,float16,fp8,0,0.585968017578125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,64,128,1,fp8,fp8,0,0.560965339342753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,64,128,1,float16,float16,0,0.3232373396555583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,64,0,1,float16,fp8,0,0.5951040188471476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,64,0,1,float16,float16,0,0.3293760021527608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,64,0,1,fp8,fp8,0,0.5636639992396036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,64,128,1,float16,fp8,0,0.31717334191004437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,64,128,1,fp8,fp8,0,0.30982933441797894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,64,0,1,float16,fp8,0,0.3231253425280253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,64,0,1,fp8,fp8,0,0.31362666686375934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,64,128,1,float16,float16,0,0.29691733916600543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,64,0,1,float16,float16,0,0.2985386649767558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,64,128,1,float16,fp8,0,0.29632000128428143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,64,128,1,fp8,fp8,0,0.27160000801086426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,64,0,1,float16,fp8,0,0.30001600583394367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,64,0,1,fp8,fp8,0,0.27371732393900555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,64,128,1,float16,float16,0,0.29626667499542236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,64,0,1,float16,float16,0,0.3009333411852519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,64,128,1,float16,fp8,0,0.29580267270406085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,64,128,1,fp8,fp8,0,0.27686933676401776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,64,0,1,fp8,fp8,0,0.28060799837112427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,64,0,1,float16,fp8,0,0.2997013330459595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,64,128,1,float16,float16,0,0.29869333902994794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,64,0,1,float16,float16,0,0.3040800094604492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,64,128,1,fp8,fp8,0,0.2815679907798767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,64,128,1,float16,fp8,0,0.29916266600290936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,64,0,1,float16,fp8,0,0.30269332726796466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,64,0,1,fp8,fp8,0,0.28468799591064453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,64,128,1,float16,float16,0,0.3057226737340291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,64,0,1,float16,float16,0,0.3102560043334961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,64,128,1,float16,fp8,0,0.30421332518259686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,64,128,1,fp8,fp8,0,0.2896266579627991
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,64,0,1,fp8,fp8,0,0.2930399974187215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,64,0,1,float16,fp8,0,0.30797866980234784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,64,128,1,float16,float16,0,0.1714400053024292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,64,0,1,float16,float16,0,0.17390932639439902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,64,128,1,float16,fp8,0,0.16958399613698324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,64,128,1,fp8,fp8,0,0.16684265931447348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,64,0,1,float16,fp8,0,0.17230933904647827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,64,0,1,fp8,fp8,0,0.16760534048080444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,64,128,1,float16,float16,0,0.15692266821861267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,64,0,1,float16,float16,0,0.15837333599726358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,64,128,1,float16,fp8,0,0.15523200233777365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,64,128,1,fp8,fp8,0,0.14592533310254416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,64,0,1,float16,fp8,0,0.15889599919319153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,64,0,1,fp8,fp8,0,0.1477226714293162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,64,128,1,float16,float16,0,0.156741331020991
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,64,0,1,float16,float16,0,0.15924266974131265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,64,128,1,float16,fp8,0,0.1574720044930776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,64,128,1,fp8,fp8,0,0.14870400230089822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,64,0,1,fp8,fp8,0,0.14875200390815735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,64,0,1,float16,fp8,0,0.15985066692034403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,64,128,1,float16,float16,0,0.15943466623624167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,64,128,1,float16,fp8,0,0.1593173344930013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,64,0,1,float16,float16,0,0.16084800163904825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,64,128,1,fp8,fp8,0,0.15024532874425253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,64,0,1,float16,fp8,0,0.1607253352801005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,64,0,1,fp8,fp8,0,0.152319997549057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,64,128,1,float16,float16,0,0.16144532958666483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,64,0,1,float16,float16,0,0.16305599610010782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,64,128,1,float16,fp8,0,0.1606613298257192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,64,128,1,fp8,fp8,0,0.15609066685040793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,64,0,1,float16,fp8,0,0.1630773345629374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,64,0,1,fp8,fp8,0,0.15680533647537231
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,64,128,1,float16,float16,0,0.09532800316810608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,64,0,1,float16,float16,0,0.0974079966545105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,64,128,1,float16,fp8,0,0.09521599610646565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,64,128,1,fp8,fp8,0,0.09680533409118652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,64,0,1,float16,fp8,0,0.09689600268999736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,64,0,1,fp8,fp8,0,0.09554133812586467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,64,128,1,float16,float16,0,0.08885866403579712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,64,128,1,float16,fp8,0,0.08692266543706258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,64,0,1,float16,float16,0,0.08894933263460796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,64,128,1,fp8,fp8,0,0.0806826651096344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,64,0,1,float16,fp8,0,0.08801066875457764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,64,0,1,fp8,fp8,0,0.08064533273379008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,64,128,1,float16,float16,0,0.08889066179593404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,64,0,1,float16,float16,0,0.08803733189900716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,64,128,1,float16,fp8,0,0.08730133374532063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,64,128,1,fp8,fp8,0,0.08101866642634074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,64,0,1,fp8,fp8,0,0.08267199993133545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,64,0,1,float16,fp8,0,0.08901866277058919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,64,128,1,float16,float16,0,0.08694932858149211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,64,0,1,float16,float16,0,0.08939199646313985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,64,128,1,float16,fp8,0,0.08868799606959026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,64,128,1,fp8,fp8,0,0.08167999982833862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,64,0,1,float16,fp8,0,0.08958933750788371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,64,0,1,fp8,fp8,0,0.08283733328183492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,64,128,1,float16,float16,0,0.088319996992747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,64,0,1,float16,float16,0,0.09053867061932881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,64,128,1,float16,fp8,0,0.08901866277058919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,64,128,1,fp8,fp8,0,0.08547199765841167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,64,0,1,float16,fp8,0,0.08965866764386494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,64,0,1,fp8,fp8,0,0.0848586658636729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,64,128,1,float16,float16,0,0.05608533322811127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,64,0,1,float16,float16,0,0.056287998954455055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,64,128,1,float16,fp8,0,0.05450133482615153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,64,128,1,fp8,fp8,0,0.053082664807637535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,64,0,1,float16,fp8,0,0.05624000231424967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,64,0,1,fp8,fp8,0,0.05406933526198069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,64,128,1,float16,float16,0,0.05241066714127859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,64,0,1,float16,float16,0,0.05407466491063436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,64,128,1,float16,fp8,0,0.05233600238958994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,64,128,1,fp8,fp8,0,0.049973333875338234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,64,0,1,float16,fp8,0,0.05380799869696299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,64,0,1,fp8,fp8,0,0.05108266572157542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,64,0,1,float16,float16,0,0.054885332783063255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,64,128,1,float16,float16,0,0.05403199791908264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,64,128,1,float16,fp8,0,0.051856001218159996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,64,128,1,fp8,fp8,0,0.050442665815353394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,64,0,1,float16,fp8,0,0.05401599903901418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,64,0,1,fp8,fp8,0,0.0499893327554067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,64,0,1,float16,float16,0,0.05444799860318502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,64,128,1,float16,float16,0,0.05357866485913595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,64,128,1,float16,fp8,0,0.05397866666316986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,64,128,1,fp8,fp8,0,0.04978133241335551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,64,0,1,float16,fp8,0,0.054714664816856384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,64,0,1,fp8,fp8,0,0.05085866649945577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,64,128,1,float16,float16,0,0.05397333204746246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,64,0,1,float16,float16,0,0.05448000133037567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,64,128,1,float16,fp8,0,0.05399466554323832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,64,128,1,fp8,fp8,0,0.05179733534653982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,64,0,1,float16,fp8,0,0.054474666714668274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,64,0,1,fp8,fp8,0,0.05228800078233083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,64,128,1,float16,float16,0,0.037402667105197906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,64,0,1,float16,float16,0,0.03779733429352442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,64,128,1,float16,fp8,0,0.03751466671625773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,64,128,1,fp8,fp8,0,0.03688533355792364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,64,0,1,float16,fp8,0,0.03777066618204117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,64,0,1,fp8,fp8,0,0.03555200000603994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,64,128,1,float16,float16,0,0.03563733398914337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,64,0,1,float16,float16,0,0.03774933268626531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,64,128,1,float16,fp8,0,0.03769599894682566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,64,128,1,fp8,fp8,0,0.033743999898433685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,64,0,1,float16,fp8,0,0.037621334195137024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,64,0,1,fp8,fp8,0,0.03350399931271871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,64,128,1,float16,float16,0,0.036144000788529716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,64,0,1,float16,float16,0,0.03728533287843069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,64,128,1,float16,fp8,0,0.03576533248027166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,64,128,1,fp8,fp8,0,0.03364266703526179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,64,0,1,float16,fp8,0,0.03755199909210205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,64,0,1,fp8,fp8,0,0.03387200087308884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,64,128,1,float16,float16,0,0.03749333322048187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,64,0,1,float16,float16,0,0.03779733429352442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,64,128,1,float16,fp8,0,0.037503999968369804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,64,128,1,fp8,fp8,0,0.035802667339642845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,64,0,1,float16,fp8,0,0.037418665985266365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,64,0,1,fp8,fp8,0,0.03576533248027166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,64,128,1,float16,float16,0,0.03770666569471359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,64,0,1,float16,float16,0,0.03746666759252548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,64,128,1,float16,fp8,0,0.0376800000667572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,64,128,1,fp8,fp8,0,0.03552533437808355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,64,0,1,float16,fp8,0,0.0378560001651446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,64,0,1,fp8,fp8,0,0.0356480007370313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,64,128,1,float16,float16,0,0.027290667096773785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,64,0,1,float16,float16,0,0.029520000020662945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,64,128,1,float16,fp8,0,0.02762666592995326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,64,128,1,fp8,fp8,0,0.02757866680622101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,64,0,1,float16,fp8,0,0.027653334041436512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,64,0,1,fp8,fp8,0,0.0272533322374026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,64,128,1,float16,float16,0,0.0273333340883255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,64,0,1,float16,float16,0,0.027248000105222065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,64,128,1,float16,fp8,0,0.027232001225153606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,64,128,1,fp8,fp8,0,0.025850666066010792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,64,0,1,float16,fp8,0,0.02735466758410136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,64,0,1,fp8,fp8,0,0.02550400048494339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,64,128,1,float16,float16,0,0.027493332823117573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,64,0,1,float16,float16,0,0.027482666075229645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,64,128,1,float16,fp8,0,0.027269333600997925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,64,128,1,fp8,fp8,0,0.02703999976317088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,64,0,1,float16,fp8,0,0.027589333554108936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,64,0,1,fp8,fp8,0,0.027434666951497395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,64,128,1,float16,float16,0,0.027258666853109997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,64,128,1,float16,fp8,0,0.027221334477265675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,64,0,1,float16,float16,0,0.027727998793125153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,64,128,1,fp8,fp8,0,0.02569066733121872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,64,0,1,float16,fp8,0,0.027530667682488758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,64,0,1,fp8,fp8,0,0.027530667682488758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,64,128,1,float16,float16,0,0.027503999571005504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,64,0,1,float16,float16,0,0.02899733434120814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,64,128,1,float16,fp8,0,0.02771199991305669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,64,128,1,fp8,fp8,0,0.027349332968393963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,64,0,1,float16,fp8,0,0.02951466788848241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,64,0,1,fp8,fp8,0,0.027082666754722595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,64,0,1,float16,float16,0,0.025098666548728943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,64,128,1,float16,float16,0,0.023562667270501454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,64,128,1,float16,fp8,0,0.02332799881696701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,64,128,1,fp8,fp8,0,0.02346133440732956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,64,0,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,64,0,1,fp8,fp8,0,0.022965334355831146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,64,128,1,float16,float16,0,0.023178666830062866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,64,0,1,float16,float16,0,0.023354666928450268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,64,128,1,float16,fp8,0,0.023061332603295643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,64,128,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,64,0,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,64,0,1,fp8,fp8,0,0.02316266546646754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,64,128,1,float16,float16,0,0.023077333966890972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,64,128,1,float16,fp8,0,0.02348266790310542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,64,0,1,float16,float16,0,0.023007998863856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,64,128,1,fp8,fp8,0,0.023018665611743927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,64,0,1,float16,fp8,0,0.023418667415777843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,64,0,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,64,128,1,float16,float16,0,0.023472001155217487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,64,0,1,float16,float16,0,0.02311466634273529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,64,128,1,fp8,fp8,0,0.02142400046189626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,64,0,1,float16,fp8,0,0.025018667181332905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,64,0,1,fp8,fp8,0,0.0233599990606308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,64,128,1,float16,float16,0,0.0233599990606308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,64,0,1,float16,float16,0,0.02441066751877467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,64,128,1,float16,fp8,0,0.023445333043734234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,64,128,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,64,0,1,float16,fp8,0,0.025199999411900837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,64,0,1,fp8,fp8,0,0.023423999547958374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,64,128,1,float16,float16,0,0.5254186789194742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,64,0,1,float16,float16,0,0.5174826780954996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,64,128,1,float16,fp8,0,0.5249866644541422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,64,128,1,fp8,fp8,0,0.47884265581766766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,64,0,1,float16,fp8,0,0.514304002126058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,64,0,1,fp8,fp8,0,0.46528534094492596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,64,128,1,float16,float16,0,0.5322240193684896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,64,0,1,float16,float16,0,0.5214399894078573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,64,128,1,float16,fp8,0,0.5292640129725138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,64,128,1,fp8,fp8,0,0.4946986834208171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,64,0,1,float16,fp8,0,0.5174933274586996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,64,0,1,fp8,fp8,0,0.4814560015996297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,64,128,1,float16,float16,0,0.5356746514638265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,64,0,1,float16,float16,0,0.5242986679077148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,64,128,1,float16,fp8,0,0.5322666565577189
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,64,128,1,fp8,fp8,0,0.49933866659800213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,64,0,1,float16,fp8,0,0.5199040174484253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,64,0,1,fp8,fp8,0,0.48795199394226074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,64,128,1,float16,float16,0,0.5431306759516398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,64,0,1,float16,float16,0,0.5327733357747396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,64,128,1,float16,fp8,0,0.5373599926630656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,64,128,1,fp8,fp8,0,0.5263786713282267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,64,0,1,float16,fp8,0,0.5283039808273315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,64,0,1,fp8,fp8,0,0.5164213180541992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,64,0,1,float16,float16,0,0.29688533147176105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,64,128,1,float16,float16,0,0.3021226723988851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,64,128,1,float16,fp8,0,0.2955893278121948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,64,128,1,fp8,fp8,0,0.2888053258260091
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,64,0,1,float16,fp8,0,0.29022934039433795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,64,0,1,fp8,fp8,0,0.28141866127649945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,64,128,1,float16,float16,0,0.2744693358739217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,64,0,1,float16,float16,0,0.26758400599161786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,64,128,1,float16,fp8,0,0.27538132667541504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,64,128,1,fp8,fp8,0,0.24779733022054037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,64,0,1,float16,fp8,0,0.2669706741968791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,64,0,1,fp8,fp8,0,0.2437546650568644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,64,128,1,float16,float16,0,0.2752106587092082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,64,0,1,float16,float16,0,0.2698400020599365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,64,128,1,float16,fp8,0,0.2738773425420125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,64,128,1,fp8,fp8,0,0.25760533412297565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,64,0,1,float16,fp8,0,0.26820266246795654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,64,0,1,fp8,fp8,0,0.2499199906984965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,64,128,1,float16,float16,0,0.2760746677716573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,64,0,1,float16,float16,0,0.2709706624348958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,64,128,1,float16,fp8,0,0.27480532725652057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,64,128,1,fp8,fp8,0,0.25786133607228595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,64,0,1,float16,fp8,0,0.27004265785217285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,64,0,1,fp8,fp8,0,0.2521493236223857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,64,128,1,float16,float16,0,0.2821120023727417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,64,0,1,float16,float16,0,0.27772800127665204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,64,128,1,float16,fp8,0,0.2802773316701253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,64,128,1,fp8,fp8,0,0.26660799980163574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,64,0,1,float16,fp8,0,0.2760746677716573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,64,0,1,fp8,fp8,0,0.2627093394597371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,64,0,1,float16,float16,0,0.15894933541615805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,64,128,1,float16,float16,0,0.1613653302192688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,64,128,1,float16,fp8,0,0.15812266866366068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,64,128,1,fp8,fp8,0,0.15661333004633585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,64,0,1,float16,fp8,0,0.15575466553370157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,64,128,1,float16,float16,0,0.14670399824778238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,64,0,1,fp8,fp8,0,0.1525866687297821
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,64,0,1,float16,float16,0,0.14350933829943338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,64,128,1,float16,fp8,0,0.14787733554840088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,64,128,1,fp8,fp8,0,0.13609600067138672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,64,0,1,float16,fp8,0,0.14375999569892883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,64,0,1,fp8,fp8,0,0.1301866670449575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,64,128,1,float16,float16,0,0.14857600132624307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,64,0,1,float16,float16,0,0.14444266756375632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,64,128,1,float16,fp8,0,0.146506667137146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,64,128,1,fp8,fp8,0,0.13793599605560303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,64,0,1,float16,fp8,0,0.14428266882896423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,64,0,1,fp8,fp8,0,0.133925328652064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,64,128,1,float16,float16,0,0.14845866958300272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,64,0,1,float16,float16,0,0.1453973352909088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,64,128,1,float16,fp8,0,0.1483519971370697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,64,128,1,fp8,fp8,0,0.1397386689980825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,64,0,1,float16,fp8,0,0.14531733592351279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,64,0,1,fp8,fp8,0,0.1372160017490387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,64,128,1,float16,float16,0,0.15081066886583963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,64,0,1,float16,float16,0,0.14684800306955972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,64,128,1,float16,fp8,0,0.150325338045756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,64,128,1,fp8,fp8,0,0.14428800344467163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,64,128,1,float16,float16,0,0.09118400017420451
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,64,0,1,float16,fp8,0,0.14754666884740195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,64,0,1,fp8,fp8,0,0.1400373379389445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,64,0,1,float16,float16,0,0.08855467041333516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,64,128,1,float16,fp8,0,0.09111467003822327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,64,128,1,fp8,fp8,0,0.0906933347384135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,64,0,1,float16,fp8,0,0.08901866277058919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,64,0,1,fp8,fp8,0,0.08894399801890056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,64,128,1,float16,float16,0,0.08252266546090443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,64,0,1,float16,float16,0,0.08238400022188823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,64,128,1,float16,fp8,0,0.08389866352081299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,64,128,1,fp8,fp8,0,0.07626666625340779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,64,0,1,float16,fp8,0,0.08073066671689351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,64,0,1,fp8,fp8,0,0.07393066585063934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,64,128,1,float16,float16,0,0.08379733562469482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,64,0,1,float16,float16,0,0.08118399977684021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,64,128,1,float16,fp8,0,0.08261333405971527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,64,128,1,fp8,fp8,0,0.07658666869004567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,64,0,1,float16,fp8,0,0.08282133440176646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,64,0,1,fp8,fp8,0,0.07522666454315186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,64,128,1,float16,float16,0,0.08329600095748901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,64,0,1,float16,float16,0,0.08264533181985219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,64,128,1,float16,fp8,0,0.08262399832407634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,64,128,1,fp8,fp8,0,0.07784000039100647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,64,0,1,float16,fp8,0,0.08241599798202515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,64,0,1,fp8,fp8,0,0.07630933324495952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,64,128,1,float16,float16,0,0.0846613347530365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,64,0,1,float16,float16,0,0.08402132987976074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,64,128,1,float16,fp8,0,0.08473599950472514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,64,128,1,fp8,fp8,0,0.08086933195590973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,64,0,1,float16,fp8,0,0.08193600177764893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,64,0,1,fp8,fp8,0,0.07891199986139934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,64,128,1,float16,float16,0,0.05189866820971171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,64,0,1,float16,float16,0,0.05198933184146881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,64,128,1,float16,fp8,0,0.0517546683549881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,64,128,1,fp8,fp8,0,0.04969066878159841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,64,0,1,float16,fp8,0,0.05202133456865946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,64,128,1,float16,float16,0,0.05019199848175049
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,64,0,1,fp8,fp8,0,0.04822933177153269
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,64,0,1,float16,float16,0,0.048010667165120445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,64,128,1,float16,fp8,0,0.05004266897837321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,64,128,1,fp8,fp8,0,0.04614933331807455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,64,0,1,float16,fp8,0,0.048384000857671104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,64,0,1,fp8,fp8,0,0.04563199977080027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,64,128,1,float16,float16,0,0.05017066498597463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,64,0,1,float16,float16,0,0.04979733129342397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,64,128,1,float16,fp8,0,0.04982399940490723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,64,128,1,fp8,fp8,0,0.047637333472569786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,64,0,1,float16,fp8,0,0.04987200101216634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,64,0,1,fp8,fp8,0,0.045935998360315956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,64,128,1,float16,float16,0,0.050250664353370667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,64,0,1,float16,float16,0,0.04794666667779287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,64,128,1,float16,fp8,0,0.04993600149949392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,64,128,1,fp8,fp8,0,0.046207999189694725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,64,0,1,float16,fp8,0,0.048751999934514366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,64,0,1,fp8,fp8,0,0.046522667010625206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,64,128,1,float16,float16,0,0.05003199974695841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,64,0,1,float16,float16,0,0.049365331729253135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,64,128,1,float16,fp8,0,0.0513973335425059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,64,128,1,fp8,fp8,0,0.04778666794300079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,64,0,1,float16,fp8,0,0.04877333343029022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,64,0,1,fp8,fp8,0,0.04595200220743815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,64,128,1,float16,float16,0,0.03587199995915095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,64,0,1,float16,float16,0,0.035616000493367515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,64,128,1,float16,fp8,0,0.035461333890755974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,64,128,1,fp8,fp8,0,0.035418666899204254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,64,0,1,float16,fp8,0,0.03543466577927271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,64,0,1,fp8,fp8,0,0.03339199970165888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,64,128,1,float16,float16,0,0.033610666791598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,64,0,1,float16,float16,0,0.03348266581694285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,64,128,1,float16,fp8,0,0.03517866631348928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,64,128,1,fp8,fp8,0,0.031557333966096245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,64,0,1,float16,fp8,0,0.03350933392842611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,64,0,1,fp8,fp8,0,0.0322080006202062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,64,128,1,float16,float16,0,0.03541333228349686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,64,0,1,float16,float16,0,0.03350399931271871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,64,128,1,float16,fp8,0,0.03583466758330663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,64,128,1,fp8,fp8,0,0.03382399926582972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,64,0,1,float16,fp8,0,0.033813332517941795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,64,0,1,fp8,fp8,0,0.03486400097608566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,64,0,1,float16,float16,0,0.03346133232116699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,64,128,1,float16,float16,0,0.03566933423280716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,64,128,1,float16,fp8,0,0.03583466758330663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,64,128,1,fp8,fp8,0,0.03366400053103765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,64,0,1,float16,fp8,0,0.0356480007370313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,64,0,1,fp8,fp8,0,0.032186667124430336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,64,128,1,float16,float16,0,0.035717333356539406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,64,0,1,float16,float16,0,0.033615998923778534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,64,128,1,float16,fp8,0,0.035429333647092186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,64,128,1,fp8,fp8,0,0.03364266703526179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,64,0,1,float16,fp8,0,0.0340693344672521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,64,0,1,fp8,fp8,0,0.033743999898433685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,64,128,1,float16,float16,0,0.02516266703605652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,64,0,1,float16,float16,0,0.025381334125995636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,64,128,1,float16,fp8,0,0.02759466568628947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,64,128,1,fp8,fp8,0,0.025450666745503742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,64,0,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,64,0,1,fp8,fp8,0,0.025392000873883564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,64,128,1,float16,float16,0,0.025392000873883564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,64,0,1,float16,float16,0,0.02550933261712392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,64,128,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,64,128,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,64,0,1,float16,fp8,0,0.02533866713444392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,64,0,1,fp8,fp8,0,0.023578666150569916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,64,128,1,float16,float16,0,0.025546667476495106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,64,0,1,float16,float16,0,0.025562666356563568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,64,128,1,float16,fp8,0,0.025263999899228413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,64,128,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,64,0,1,float16,fp8,0,0.02573866645495097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,64,0,1,fp8,fp8,0,0.025285333395004272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,64,128,1,float16,float16,0,0.025279998779296875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,64,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,64,128,1,float16,fp8,0,0.02532800038655599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,64,128,1,fp8,fp8,0,0.024959998826185863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,64,0,1,float16,fp8,0,0.025008000433444977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,64,0,1,fp8,fp8,0,0.02327999969323476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,64,128,1,float16,float16,0,0.025386666258176167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,64,0,1,float16,float16,0,0.025087999800841015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,64,128,1,float16,fp8,0,0.025706666211287182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,64,128,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,64,0,1,float16,fp8,0,0.02604266752799352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,64,0,1,fp8,fp8,0,0.025253333151340485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,64,128,1,float16,float16,0,0.021082667013009388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,64,0,1,float16,float16,0,0.021349333226680756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,64,128,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,64,128,1,fp8,fp8,0,0.020165332903464634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,64,0,1,float16,fp8,0,0.021349333226680756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,64,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,64,128,1,float16,float16,0,0.021183999876181286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,64,0,1,float16,float16,0,0.021205333371957142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,64,128,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,64,128,1,fp8,fp8,0,0.019573333362738293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,64,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,64,0,1,fp8,fp8,0,0.019509332875410717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,64,128,1,float16,float16,0,0.021125334004561108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,64,0,1,float16,float16,0,0.02117866774400075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,64,128,1,float16,fp8,0,0.02178666740655899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,64,128,1,fp8,fp8,0,0.021429332594076794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,64,0,1,float16,fp8,0,0.021407999098300934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,64,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,64,128,1,float16,float16,0,0.02130666623512904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,64,0,1,float16,float16,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,64,128,1,float16,fp8,0,0.02162133405605952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,64,128,1,fp8,fp8,0,0.021013334393501282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,64,0,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,64,128,1,float16,float16,0,0.02144533395767212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,64,0,1,float16,fp8,0,0.02160533269246419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,64,0,1,float16,float16,0,0.02128000060717265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,64,128,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,64,128,1,fp8,fp8,0,0.021418665846188862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,64,0,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,64,0,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,64,128,1,float16,float16,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,64,0,1,float16,float16,0,0.01940800001223882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,64,128,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,64,128,1,fp8,fp8,0,0.01969066634774208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,64,0,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,64,0,1,float16,fp8,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,64,128,1,float16,float16,0,0.021226666867733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,64,0,1,float16,float16,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,64,128,1,float16,fp8,0,0.021386665602525074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,64,128,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,64,0,1,float16,fp8,0,0.01978133370478948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,64,0,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,64,128,1,float16,float16,0,0.020848001043001812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,64,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,64,128,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,64,0,1,float16,fp8,0,0.02096533278624217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,64,128,1,fp8,fp8,0,0.019637333850065868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,64,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,64,128,1,float16,float16,0,0.021029333273569744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,64,128,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,64,0,1,float16,float16,0,0.021205333371957142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,64,128,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,64,0,1,float16,fp8,0,0.021386665602525074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,64,0,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,64,128,1,float16,float16,0,0.020703999946514767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,64,0,1,float16,float16,0,0.02128533273935318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,64,128,1,float16,fp8,0,0.021349333226680756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,64,128,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,64,0,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,64,0,1,float16,fp8,0,0.02089600016673406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,64,128,1,float16,float16,0,0.2900693416595459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,64,128,1,float16,fp8,0,0.28774933020273846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,64,0,1,float16,float16,0,0.2890346646308899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,64,128,1,fp8,fp8,0,0.265557328859965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,64,0,1,fp8,fp8,0,0.2657279968261719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,64,0,1,float16,fp8,0,0.28759467601776123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,64,128,1,float16,float16,0,0.2890666723251343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,64,128,1,float16,fp8,0,0.28891199827194214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,64,0,1,float16,float16,0,0.2898826599121094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,64,128,1,fp8,fp8,0,0.27473066250483197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,64,0,1,float16,fp8,0,0.28992533683776855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,64,0,1,fp8,fp8,0,0.2760106722513835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,64,128,1,float16,float16,0,0.29077333211898804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,64,0,1,float16,float16,0,0.29447466135025024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,64,128,1,float16,fp8,0,0.29074132442474365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,64,128,1,fp8,fp8,0,0.27638399600982666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,64,0,1,float16,fp8,0,0.2913973331451416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,64,0,1,fp8,fp8,0,0.2765386700630188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,64,128,1,float16,float16,0,0.29518399635950726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,64,0,1,float16,float16,0,0.2971466581026713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,64,128,1,float16,fp8,0,0.29441599051157635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,64,128,1,fp8,fp8,0,0.2878719965616862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,64,128,1,float16,float16,0,0.1669279932975769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,64,0,1,float16,fp8,0,0.2940533359845479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,64,0,1,fp8,fp8,0,0.2858826716740926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,64,0,1,float16,float16,0,0.16810667514801025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,64,128,1,float16,fp8,0,0.16514133413632712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,64,128,1,fp8,fp8,0,0.16405333081881204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,64,0,1,float16,fp8,0,0.16361066699028015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,64,128,1,float16,float16,0,0.15412799517313638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,64,0,1,fp8,fp8,0,0.16356266538302103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,64,0,1,float16,float16,0,0.15380799770355225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,64,128,1,float16,fp8,0,0.15425599614779154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,64,128,1,fp8,fp8,0,0.14220266540845236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,64,0,1,fp8,fp8,0,0.14200533429781595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,64,0,1,float16,fp8,0,0.15316800276438394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,64,128,1,float16,float16,0,0.15382933616638184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,64,0,1,float16,float16,0,0.15315733353296915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,64,128,1,float16,fp8,0,0.15280000368754068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,64,128,1,fp8,fp8,0,0.14457600315411887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,64,0,1,float16,fp8,0,0.15238400300343832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,64,0,1,fp8,fp8,0,0.14414933323860168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,64,128,1,float16,float16,0,0.154639999071757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,64,0,1,float16,float16,0,0.15446399648984274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,64,128,1,float16,fp8,0,0.15413866440455118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,64,128,1,fp8,fp8,0,0.14808000127474466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,64,0,1,float16,fp8,0,0.154448002576828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,64,0,1,fp8,fp8,0,0.14819199840227762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,64,128,1,float16,float16,0,0.15666133165359497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,64,0,1,float16,float16,0,0.15704533457756042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,64,128,1,float16,fp8,0,0.15658666690190634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,64,0,1,float16,fp8,0,0.1554080049196879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,64,128,1,fp8,fp8,0,0.15316800276438394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,64,128,1,float16,float16,0,0.09299199779828389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,64,0,1,fp8,fp8,0,0.15315733353296915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,64,0,1,float16,float16,0,0.09357866644859314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,64,128,1,float16,fp8,0,0.09202667077382405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,64,128,1,fp8,fp8,0,0.09283733367919922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,64,0,1,float16,fp8,0,0.09265066186587016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,64,0,1,fp8,fp8,0,0.09333866834640503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,64,128,1,float16,float16,0,0.08520533641179402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,64,0,1,float16,float16,0,0.08685866991678874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,64,128,1,float16,fp8,0,0.08547199765841167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,64,128,1,fp8,fp8,0,0.07878933350245158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,64,0,1,float16,fp8,0,0.0865226686000824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,64,0,1,fp8,fp8,0,0.07850666840871175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,64,128,1,float16,float16,0,0.08584533135096233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,64,0,1,float16,float16,0,0.08586666981379192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,64,128,1,float16,fp8,0,0.08507200082143147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,64,128,1,fp8,fp8,0,0.08019199967384338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,64,0,1,float16,fp8,0,0.08546666304270427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,64,0,1,fp8,fp8,0,0.07858133316040039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,64,128,1,float16,float16,0,0.08613866567611694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,64,0,1,float16,float16,0,0.08561066786448161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,64,128,1,float16,fp8,0,0.08705066641171773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,64,128,1,fp8,fp8,0,0.07852266728878021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,64,0,1,float16,fp8,0,0.08519466718037923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,64,0,1,fp8,fp8,0,0.08016000191370647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,64,128,1,float16,float16,0,0.08701866865158081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,64,0,1,float16,float16,0,0.08846933643023173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,64,128,1,float16,fp8,0,0.08507200082143147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,64,128,1,fp8,fp8,0,0.0831573357184728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,64,0,1,float16,fp8,0,0.08690667152404785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,64,0,1,fp8,fp8,0,0.08258666594823201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,64,128,1,float16,float16,0,0.053914666175842285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,64,0,1,float16,float16,0,0.052895997961362205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,64,128,1,float16,fp8,0,0.05395199855168661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,64,128,1,fp8,fp8,0,0.05193600058555603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,64,0,1,float16,fp8,0,0.052255998055140175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,64,0,1,fp8,fp8,0,0.053360000252723694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,64,128,1,float16,float16,0,0.05134400228659312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,64,0,1,float16,float16,0,0.05229333539803823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,64,128,1,float16,fp8,0,0.051829333106676735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,64,128,1,fp8,fp8,0,0.04789866507053375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,64,0,1,float16,fp8,0,0.05209066470464071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,64,0,1,fp8,fp8,0,0.048384000857671104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,64,128,1,float16,float16,0,0.05179733534653982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,64,0,1,float16,float16,0,0.052095999320348106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,64,128,1,float16,fp8,0,0.05113600194454193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,64,128,1,fp8,fp8,0,0.04804799954096476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,64,0,1,float16,fp8,0,0.05218133330345154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,64,0,1,fp8,fp8,0,0.04799999793370565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,64,128,1,float16,float16,0,0.05190933247407278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,64,0,1,float16,float16,0,0.05162133276462555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,64,128,1,float16,fp8,0,0.05157866577307383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,64,128,1,fp8,fp8,0,0.049039999643961586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,64,0,1,float16,fp8,0,0.05150933563709259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,64,0,1,fp8,fp8,0,0.048010667165120445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,64,128,1,float16,float16,0,0.05202133456865946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,64,0,1,float16,float16,0,0.052101333936055504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,64,128,1,float16,fp8,0,0.052202666799227394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,64,128,1,fp8,fp8,0,0.04971733192602793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,64,0,1,float16,fp8,0,0.05148266752560934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,64,0,1,fp8,fp8,0,0.0499839981396993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,64,128,1,float16,float16,0,0.033674667278925575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,64,0,1,float16,float16,0,0.03358400116364161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,64,128,1,float16,fp8,0,0.03349333256483078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,64,128,1,fp8,fp8,0,0.03296533226966858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,64,0,1,float16,fp8,0,0.03408000121514002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,64,0,1,fp8,fp8,0,0.03344533344109853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,64,128,1,float16,float16,0,0.03332799921433131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,64,0,1,float16,float16,0,0.03340800106525421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,64,128,1,float16,fp8,0,0.032255999743938446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,64,128,1,fp8,fp8,0,0.031557333966096245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,64,0,1,float16,fp8,0,0.03355200091997782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,64,0,1,fp8,fp8,0,0.03146133323510488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,64,128,1,float16,float16,0,0.033039999504884086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,64,0,1,float16,float16,0,0.03195200115442276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,64,128,1,float16,fp8,0,0.03331200033426285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,64,128,1,fp8,fp8,0,0.031850665807724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,64,0,1,float16,fp8,0,0.03252266595760981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,64,0,1,fp8,fp8,0,0.03182400017976761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,64,128,1,float16,float16,0,0.03329599897066752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,64,0,1,float16,float16,0,0.033701332906881966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,64,128,1,float16,fp8,0,0.033520000676314034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,64,128,1,fp8,fp8,0,0.03163733333349228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,64,0,1,float16,fp8,0,0.033610666791598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,64,0,1,fp8,fp8,0,0.0316746657093366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,64,128,1,float16,float16,0,0.03389333436886469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,64,0,1,float16,float16,0,0.03363733241955439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,64,128,1,float16,fp8,0,0.03339733431736628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,64,128,1,fp8,fp8,0,0.03369066615899404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,64,0,1,float16,fp8,0,0.033546666304270424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,64,0,1,fp8,fp8,0,0.0322773332397143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,64,128,1,float16,float16,0,0.026447998980681103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,64,0,1,float16,float16,0,0.02734400083621343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,64,128,1,float16,fp8,0,0.026613332331180573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,64,0,1,float16,fp8,0,0.02756800005833308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,64,0,1,fp8,fp8,0,0.027072000006834667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,64,128,1,fp8,fp8,0,0.02571200082699458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,64,128,1,float16,float16,0,0.025290665527184803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,64,0,1,float16,float16,0,0.026560001075267792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,64,128,1,float16,fp8,0,0.025370667378107708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,64,0,1,float16,fp8,0,0.025514667232831318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,64,128,1,fp8,fp8,0,0.025370667378107708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,64,0,1,fp8,fp8,0,0.025231999655564625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,64,128,1,float16,float16,0,0.02550933261712392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,64,0,1,float16,float16,0,0.026208000878492992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,64,128,1,float16,fp8,0,0.026677332818508148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,64,128,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,64,0,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,64,0,1,float16,fp8,0,0.02625600000222524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,64,128,1,float16,float16,0,0.02532800038655599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,64,0,1,float16,float16,0,0.02515200028816859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,64,128,1,fp8,fp8,0,0.02565866708755493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,64,128,1,float16,fp8,0,0.02555199960867564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,64,0,1,float16,fp8,0,0.02531733363866806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,64,0,1,fp8,fp8,0,0.02555199960867564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,64,128,1,float16,float16,0,0.025754667818546295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,64,0,1,float16,float16,0,0.025557334224383037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,64,128,1,float16,fp8,0,0.02737066646416982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,64,128,1,fp8,fp8,0,0.02550933261712392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,64,0,1,float16,fp8,0,0.0271573339899381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,64,0,1,fp8,fp8,0,0.025631998976071674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,64,128,1,float16,float16,0,0.01921066641807556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,64,0,1,float16,float16,0,0.01913600042462349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,64,128,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,64,128,1,fp8,fp8,0,0.01852799952030182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,64,0,1,float16,fp8,0,0.019391999890406925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,64,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,64,128,1,float16,float16,0,0.019237333287795384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,64,128,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,64,128,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,64,0,1,float16,fp8,0,0.019354666272799175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,64,0,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,64,128,1,float16,float16,0,0.01899733394384384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,64,128,1,float16,fp8,0,0.018826667219400406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,64,0,1,float16,float16,0,0.019296000401178997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,64,128,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,64,0,1,float16,fp8,0,0.01876266673207283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,64,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,64,128,1,float16,float16,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,64,0,1,float16,float16,0,0.017866666118303936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,64,128,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,64,128,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,64,0,1,float16,fp8,0,0.018709332992633183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,64,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,64,128,1,float16,float16,0,0.018058666338523228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,64,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,64,128,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,64,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,64,128,1,fp8,fp8,0,0.018058666338523228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,64,0,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,64,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,64,128,1,float16,float16,0,0.018794666975736618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,64,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,64,0,1,float16,fp8,0,0.018810667097568512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,64,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,64,128,1,float16,float16,0,0.018976000448067982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,64,0,1,float16,float16,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,64,128,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,64,128,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,64,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,64,128,1,float16,float16,0,0.018725333114465077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,64,0,1,float16,float16,0,0.019141333798567455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,64,128,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,64,128,1,fp8,fp8,0,0.01762666677435239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,64,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,64,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,64,128,1,float16,float16,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,64,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,64,128,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,64,128,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,64,0,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,64,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,64,128,1,float16,float16,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,64,0,1,float16,float16,0,0.01926400015751521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,64,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,64,128,1,fp8,fp8,0,0.017509333789348602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,64,0,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,64,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,64,128,1,float16,float16,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,64,128,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,64,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,64,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,64,128,1,float16,float16,0,0.017610666652520496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,64,128,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,64,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,64,0,1,float16,fp8,0,0.017573333034912746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,64,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,64,128,1,float16,float16,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,64,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,64,128,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,64,128,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,64,0,1,float16,fp8,0,0.017717332889636356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,64,0,1,fp8,fp8,0,0.015546667079130808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,64,128,1,float16,float16,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,64,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,64,128,1,fp8,fp8,0,0.016751999656359356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,64,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,64,0,1,fp8,fp8,0,0.017349333812793095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,64,128,1,float16,float16,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,64,0,1,float16,float16,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,64,128,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,64,128,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,64,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,64,0,1,fp8,fp8,0,0.016751999656359356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,64,128,1,float16,float16,0,0.2044586737950643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,64,0,1,float16,float16,0,0.2055466572443644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,64,128,1,float16,fp8,0,0.20602132876714072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,64,128,1,fp8,fp8,0,0.18683199087778726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,64,0,1,float16,fp8,0,0.20408533016840616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,64,0,1,fp8,fp8,0,0.1883359948794047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,64,128,1,float16,float16,0,0.2037973403930664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,64,0,1,float16,float16,0,0.20567466815312704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,64,128,1,fp8,fp8,0,0.18942399819691977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,64,128,1,float16,fp8,0,0.20534400145212808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,64,0,1,float16,fp8,0,0.20466667413711548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,64,0,1,fp8,fp8,0,0.18961066007614136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,64,0,1,float16,float16,0,0.20599466562271118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,64,128,1,float16,float16,0,0.2059839963912964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,64,128,1,float16,fp8,0,0.20542933543523154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,64,128,1,fp8,fp8,0,0.19200533628463745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,64,0,1,float16,fp8,0,0.20487467447916666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,64,0,1,fp8,fp8,0,0.1921493411064148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,64,128,1,float16,float16,0,0.20651199420293173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,64,0,1,float16,float16,0,0.2075786590576172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,64,128,1,float16,fp8,0,0.20683733622233072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,64,128,1,fp8,fp8,0,0.19717333714167276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,64,0,1,float16,fp8,0,0.20669333140055338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,64,128,1,float16,float16,0,0.11771200100580852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,64,0,1,fp8,fp8,0,0.1968266765276591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,64,0,1,float16,float16,0,0.1181706686814626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,64,128,1,float16,fp8,0,0.11771733562151591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,64,128,1,fp8,fp8,0,0.11598400274912517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,64,0,1,float16,fp8,0,0.1179146667321523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,64,0,1,fp8,fp8,0,0.11589866876602173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,64,128,1,float16,float16,0,0.11124266187349956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,64,0,1,float16,float16,0,0.1099679966767629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,64,128,1,float16,fp8,0,0.11157866319020589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,64,128,1,fp8,fp8,0,0.10125866532325745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,64,0,1,float16,fp8,0,0.1104213297367096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,64,0,1,fp8,fp8,0,0.10110400120417277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,64,128,1,float16,float16,0,0.10993599891662598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,64,0,1,float16,float16,0,0.11150933305422465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,64,128,1,float16,fp8,0,0.10965866843859355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,64,128,1,fp8,fp8,0,0.10126399993896484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,64,0,1,float16,fp8,0,0.10950932900110881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,64,0,1,fp8,fp8,0,0.10215999682744344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,64,128,1,float16,float16,0,0.11146666606267293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,64,128,1,float16,fp8,0,0.11009066303571065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,64,0,1,float16,float16,0,0.11084266503651936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,64,128,1,fp8,fp8,0,0.10248532891273499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,64,0,1,float16,fp8,0,0.11148266990979512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,64,0,1,fp8,fp8,0,0.10143466790517171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,64,128,1,float16,float16,0,0.11126933495203654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,64,0,1,float16,float16,0,0.11158399780591328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,64,128,1,float16,fp8,0,0.11157866319020589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,64,128,1,fp8,fp8,0,0.1051626702149709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,64,0,1,float16,fp8,0,0.1114026705423991
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,64,0,1,fp8,fp8,0,0.10471999645233154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,64,128,1,float16,float16,0,0.06492266555627187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,64,0,1,float16,float16,0,0.0662773350874583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,64,128,1,float16,fp8,0,0.06639466683069865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,64,128,1,fp8,fp8,0,0.06283733248710632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,64,0,1,float16,fp8,0,0.06449066599210103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,64,0,1,fp8,fp8,0,0.06367999811967213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,64,128,1,float16,float16,0,0.06429333488146464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,64,0,1,float16,float16,0,0.06426133215427399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,64,128,1,float16,fp8,0,0.06297066807746887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,64,128,1,fp8,fp8,0,0.05797866483529409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,64,0,1,float16,fp8,0,0.06317333380381267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,64,0,1,fp8,fp8,0,0.05839466551939646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,64,128,1,float16,float16,0,0.06428266565004985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,64,0,1,float16,float16,0,0.062458669145902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,64,128,1,float16,fp8,0,0.0637066662311554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,64,128,1,fp8,fp8,0,0.060218666990598045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,64,0,1,float16,fp8,0,0.06473599870999654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,64,0,1,fp8,fp8,0,0.058186665177345276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,64,128,1,float16,float16,0,0.0643146683772405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,64,0,1,float16,float16,0,0.06431999802589417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,64,128,1,float16,fp8,0,0.0645066648721695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,64,128,1,fp8,fp8,0,0.058186665177345276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,64,0,1,float16,fp8,0,0.06427200138568878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,64,0,1,fp8,fp8,0,0.060191998879114784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,64,128,1,float16,float16,0,0.06438399851322174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,64,0,1,float16,float16,0,0.06448533137639363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,64,128,1,float16,fp8,0,0.06406400104363759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,64,128,1,fp8,fp8,0,0.06006399790445963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,64,0,1,float16,fp8,0,0.06353599826494853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,64,128,1,float16,float16,0,0.041536000867684685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,64,0,1,fp8,fp8,0,0.060191998879114784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,64,0,1,float16,float16,0,0.04095999896526337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,64,128,1,float16,fp8,0,0.03998400022586187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,64,128,1,fp8,fp8,0,0.039493332306543984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,64,0,1,float16,fp8,0,0.0397173340121905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,64,0,1,fp8,fp8,0,0.03948266555865606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,64,128,1,float16,float16,0,0.039813332259655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,64,0,1,float16,float16,0,0.039813332259655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,64,128,1,float16,fp8,0,0.039461334546407066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,64,128,1,fp8,fp8,0,0.03783999880154928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,64,0,1,float16,fp8,0,0.03974399964014689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,64,0,1,fp8,fp8,0,0.03799466788768768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,64,128,1,float16,float16,0,0.040005333721637726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,64,0,1,float16,float16,0,0.039493332306543984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,64,128,1,float16,fp8,0,0.03988266736268997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,64,128,1,fp8,fp8,0,0.03917866696914037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,64,0,1,float16,fp8,0,0.03976000100374222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,64,0,1,fp8,fp8,0,0.039408000806967415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,64,128,1,float16,float16,0,0.03976533313592275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,64,0,1,float16,float16,0,0.040250666439533234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,64,128,1,float16,fp8,0,0.04178133110205332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,64,128,1,fp8,fp8,0,0.03842133283615112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,64,0,1,float16,fp8,0,0.03955200066169103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,64,0,1,fp8,fp8,0,0.03993066648642222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,64,0,1,float16,float16,0,0.03950933367013931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,64,128,1,float16,float16,0,0.0417546679576238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,64,128,1,float16,fp8,0,0.04182933270931244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,64,128,1,fp8,fp8,0,0.039488000174363456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,64,0,1,float16,fp8,0,0.04200533529122671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,64,0,1,fp8,fp8,0,0.039093332986036934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,64,128,1,float16,float16,0,0.03291733314593633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,64,0,1,float16,float16,0,0.029466666281223297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,64,128,1,float16,fp8,0,0.029530666768550873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,64,128,1,fp8,fp8,0,0.029445332785447437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,64,0,1,float16,fp8,0,0.029317334294319153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,64,0,1,fp8,fp8,0,0.02940800040960312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,64,128,1,float16,float16,0,0.02887466549873352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,64,128,1,float16,fp8,0,0.029071999092896778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,64,0,1,float16,float16,0,0.028079998989899952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,64,128,1,fp8,fp8,0,0.027232001225153606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,64,0,1,float16,fp8,0,0.027317332724730175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,64,0,1,fp8,fp8,0,0.0272533322374026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,64,128,1,float16,float16,0,0.029189333319664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,64,0,1,float16,float16,0,0.027306665976842243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,64,128,1,float16,fp8,0,0.027509334186712902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,64,128,1,fp8,fp8,0,0.027109332382678986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,64,0,1,float16,fp8,0,0.02739733209212621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,64,0,1,fp8,fp8,0,0.027029333015282948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,64,128,1,float16,float16,0,0.027242665489514668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,64,0,1,float16,float16,0,0.027269333600997925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,64,128,1,float16,fp8,0,0.029306667546431225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,64,128,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,64,0,1,float16,fp8,0,0.02938666691382726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,64,0,1,fp8,fp8,0,0.02787200113137563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,64,128,1,float16,float16,0,0.02941333254178365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,64,0,1,float16,float16,0,0.029290666182835896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,64,128,1,float16,fp8,0,0.029653333127498627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,64,128,1,fp8,fp8,0,0.027386667827765148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,64,0,1,float16,fp8,0,0.029466666281223297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,64,0,1,fp8,fp8,0,0.029359998802344005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,64,128,1,float16,float16,0,0.021114667256673176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,64,0,1,float16,float16,0,0.021168000996112823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,64,128,1,float16,fp8,0,0.02130666623512904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,64,128,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,64,0,1,float16,fp8,0,0.021125334004561108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,64,0,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,64,128,1,float16,float16,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,64,0,1,float16,float16,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,64,128,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,64,128,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,64,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,64,0,1,fp8,fp8,0,0.02144533395767212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,64,128,1,float16,float16,0,0.02110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,64,0,1,float16,float16,0,0.021104000508785248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,64,128,1,float16,fp8,0,0.021226666867733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,64,128,1,fp8,fp8,0,0.01966399947802226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,64,0,1,float16,fp8,0,0.02147199958562851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,64,0,1,fp8,fp8,0,0.019871999820073444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,64,128,1,float16,float16,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,64,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,64,128,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,64,128,1,fp8,fp8,0,0.02145066608985265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,64,0,1,float16,fp8,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,64,0,1,fp8,fp8,0,0.020314666132132213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,64,128,1,float16,float16,0,0.02142400046189626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,64,0,1,float16,float16,0,0.021125334004561108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,64,128,1,float16,fp8,0,0.021418665846188862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,64,128,1,fp8,fp8,0,0.021290667355060577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,64,0,1,float16,fp8,0,0.021183999876181286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,64,0,1,fp8,fp8,0,0.02143466720978419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,64,128,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,64,0,1,float16,float16,0,0.017535999417304993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,64,128,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,64,128,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,64,0,1,float16,fp8,0,0.01926400015751521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,64,128,1,float16,float16,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,64,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,64,128,1,float16,fp8,0,0.017759999881188076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,64,128,1,fp8,fp8,0,0.01773333301146825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,64,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,64,0,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,64,128,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,64,0,1,float16,float16,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,64,128,1,float16,fp8,0,0.017525333911180496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,64,0,1,float16,fp8,0,0.019093333433071773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,64,0,1,fp8,fp8,0,0.017498667041460674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,64,128,1,float16,float16,0,0.017338667064905167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,64,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,64,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,64,0,1,fp8,fp8,0,0.017840000490347546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,64,128,1,float16,float16,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,64,128,1,fp8,fp8,0,0.017386666188637417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,64,128,1,float16,fp8,0,0.01960533360640208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,64,128,1,float16,float16,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,64,128,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,64,128,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,64,0,1,float16,fp8,0,0.01762666677435239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,64,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,64,128,1,float16,float16,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,64,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,64,0,1,float16,float16,0,0.01568000018596649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,64,0,1,float16,fp8,0,0.017445333302021027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,64,128,1,float16,float16,0,0.01579733317097028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,64,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,64,128,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,64,128,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,64,128,1,float16,float16,0,0.017386666188637417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,64,128,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,64,128,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,64,0,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,64,0,1,fp8,fp8,0,0.01676799977819125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,64,128,1,float16,float16,0,0.01532799998919169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,64,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,64,128,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,64,0,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,64,0,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,64,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,64,0,1,float16,float16,0,0.01669866715868314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,64,128,1,float16,fp8,0,0.017535999417304993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,64,0,1,float16,fp8,0,0.01752000053723653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,64,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,64,128,1,float16,float16,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,64,128,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,64,128,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,64,0,1,fp8,fp8,0,0.015589332828919092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,64,128,1,float16,fp8,0,0.021920000513394673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,64,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,64,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,64,128,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,64,0,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,64,0,1,float16,float16,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,64,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,64,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,64,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,64,0,1,float16,float16,0,0.017370666066805523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,64,128,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,64,128,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,64,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,64,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,64,128,1,float16,float16,0,0.16296000281969705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,64,0,1,float16,float16,0,0.16294399897257486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,64,128,1,float16,fp8,0,0.16249066591262817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,64,128,1,fp8,fp8,0,0.14761599898338318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,64,0,1,float16,fp8,0,0.16250133514404297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,64,0,1,fp8,fp8,0,0.1464853286743164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,64,128,1,float16,float16,0,0.16423466801643372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,64,0,1,float16,float16,0,0.16235733032226562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,64,128,1,float16,fp8,0,0.16286933422088623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,64,128,1,fp8,fp8,0,0.14819199840227762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,64,0,1,float16,fp8,0,0.1634666621685028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,64,0,1,fp8,fp8,0,0.14856533209482828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,64,128,1,float16,float16,0,0.1648426651954651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,64,0,1,float16,float16,0,0.16352533300717673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,64,128,1,float16,fp8,0,0.16309866309165955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,64,128,1,fp8,fp8,0,0.14727466305096945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,64,0,1,float16,fp8,0,0.16405866543451944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,64,0,1,fp8,fp8,0,0.14869866768519083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,64,128,1,float16,float16,0,0.16475199659665427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,64,0,1,float16,float16,0,0.164410670598348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,64,128,1,float16,fp8,0,0.16382400194803873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,64,128,1,fp8,fp8,0,0.15152532855669656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,64,0,1,float16,fp8,0,0.1628213326136271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,64,128,1,float16,float16,0,0.09175466497739156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,64,0,1,fp8,fp8,0,0.1508853336175283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,64,0,1,float16,float16,0,0.09107733766237895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,64,128,1,float16,fp8,0,0.09073600172996521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,64,128,1,fp8,fp8,0,0.08540800213813782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,64,0,1,float16,fp8,0,0.09099733829498291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,64,0,1,fp8,fp8,0,0.08499733606974284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,64,128,1,float16,float16,0,0.08914132912953694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,64,0,1,float16,float16,0,0.08885866403579712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,64,128,1,float16,fp8,0,0.08897067109743755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,64,128,1,fp8,fp8,0,0.08243733147780101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,64,0,1,float16,fp8,0,0.08880000313123067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,64,0,1,fp8,fp8,0,0.082805335521698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,64,128,1,float16,float16,0,0.08925333619117737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,64,0,1,float16,float16,0,0.08901333808898926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,64,128,1,float16,fp8,0,0.08905067046483357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,64,128,1,fp8,fp8,0,0.08195200065771739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,64,0,1,float16,fp8,0,0.08872532844543457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,64,0,1,fp8,fp8,0,0.08121066788832347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,64,128,1,float16,float16,0,0.08927466471989949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,64,0,1,float16,float16,0,0.0890880028406779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,64,128,1,float16,fp8,0,0.08916266759236653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,64,128,1,fp8,fp8,0,0.08294933537642162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,64,0,1,float16,fp8,0,0.08872000376383464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,64,0,1,fp8,fp8,0,0.082805335521698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,64,128,1,float16,float16,0,0.09039466579755147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,64,0,1,float16,float16,0,0.08893332878748576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,64,128,1,float16,fp8,0,0.0897706647713979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,64,0,1,float16,fp8,0,0.08907199899355571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,64,128,1,fp8,fp8,0,0.08307200173536937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,64,0,1,fp8,fp8,0,0.08306666711966197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,64,128,1,float16,float16,0,0.05428266525268555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,64,0,1,float16,float16,0,0.05436799923578898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,64,128,1,float16,fp8,0,0.0540533314148585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,64,0,1,float16,fp8,0,0.053557331363360085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,64,128,1,fp8,fp8,0,0.05136533578236898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,64,0,1,fp8,fp8,0,0.04980266590913137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,64,128,1,float16,float16,0,0.05227200190226237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,64,128,1,float16,fp8,0,0.05193066596984863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,64,0,1,float16,float16,0,0.05237866441408793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,64,128,1,fp8,fp8,0,0.04863466819127401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,64,0,1,float16,fp8,0,0.05189333359400431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,64,0,1,fp8,fp8,0,0.05072000126043955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,64,128,1,float16,float16,0,0.05242133140563965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,64,0,1,float16,float16,0,0.05194133520126343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,64,128,1,fp8,fp8,0,0.04990399877230326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,64,128,1,float16,fp8,0,0.052101333936055504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,64,0,1,float16,fp8,0,0.052144000927607216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,64,0,1,fp8,fp8,0,0.049925332268079124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,64,128,1,float16,float16,0,0.053472002347310386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,64,0,1,float16,float16,0,0.05425066749254862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,64,128,1,float16,fp8,0,0.05373333394527435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,64,128,1,fp8,fp8,0,0.05051200091838837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,64,0,1,float16,fp8,0,0.0540533314148585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,64,128,1,float16,float16,0,0.05208000044027964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,64,0,1,fp8,fp8,0,0.051072001457214355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,64,0,1,float16,float16,0,0.05394133428732554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,64,128,1,float16,fp8,0,0.05190399785836538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,64,128,1,fp8,fp8,0,0.050250664353370667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,64,0,1,float16,fp8,0,0.05203199883302053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,64,128,1,float16,float16,0,0.03562666724125544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,64,0,1,fp8,fp8,0,0.049973333875338234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,64,0,1,float16,float16,0,0.035487999518712364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,64,128,1,float16,fp8,0,0.035786665976047516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,64,128,1,fp8,fp8,0,0.033439998825391136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,64,0,1,float16,fp8,0,0.0359199990828832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,64,0,1,fp8,fp8,0,0.03547733277082443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,64,128,1,float16,float16,0,0.03555200000603994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,64,0,1,float16,float16,0,0.03558400024970373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,64,128,1,float16,fp8,0,0.03543466577927271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,64,0,1,float16,fp8,0,0.03521066655715307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,64,128,1,fp8,fp8,0,0.03405333310365677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,64,0,1,fp8,fp8,0,0.03342933456103007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,64,128,1,float16,float16,0,0.034389334420363106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,64,0,1,float16,float16,0,0.035386666655540466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,64,128,1,float16,fp8,0,0.03544000039498011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,64,128,1,fp8,fp8,0,0.03366400053103765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,64,0,1,float16,fp8,0,0.03572266548871994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,64,0,1,fp8,fp8,0,0.033215999603271484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,64,128,1,float16,float16,0,0.033770665526390076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,64,0,1,float16,float16,0,0.03420799970626831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,64,128,1,float16,fp8,0,0.035589332381884255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,64,0,1,float16,fp8,0,0.035530666510264076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,64,0,1,fp8,fp8,0,0.03332799921433131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,64,128,1,fp8,fp8,0,0.03402133285999298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,64,128,1,float16,float16,0,0.035973332822322845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,64,0,1,float16,float16,0,0.03572266548871994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,64,128,1,float16,fp8,0,0.035589332381884255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,64,0,1,float16,fp8,0,0.03570666660865148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,64,128,1,fp8,fp8,0,0.03596800069014231
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,64,0,1,fp8,fp8,0,0.03562133262554804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,64,128,1,float16,float16,0,0.02334933231274287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,64,0,1,float16,float16,0,0.025221332907676697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,64,128,1,fp8,fp8,0,0.0233599990606308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,64,0,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,64,128,1,float16,fp8,0,0.027717334528764088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,64,0,1,fp8,fp8,0,0.02314666658639908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,64,128,1,float16,float16,0,0.02319466571013133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,64,0,1,float16,float16,0,0.023418667415777843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,64,128,1,fp8,fp8,0,0.02292799949645996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,64,128,1,float16,fp8,0,0.023610666394233704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,64,0,1,float16,fp8,0,0.023237332701683044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,64,0,1,fp8,fp8,0,0.021717332303524017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,64,128,1,float16,float16,0,0.023082666099071503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,64,0,1,float16,float16,0,0.023904000719388325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,64,128,1,float16,fp8,0,0.023178666830062866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,64,128,1,fp8,fp8,0,0.022991999983787537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,64,0,1,float16,fp8,0,0.02401600033044815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,64,0,1,fp8,fp8,0,0.02311466634273529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,64,128,1,float16,float16,0,0.023792001108328503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,64,128,1,float16,fp8,0,0.024725332856178284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,64,0,1,float16,float16,0,0.02365333338578542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,64,128,1,fp8,fp8,0,0.023247999449570973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,64,0,1,float16,fp8,0,0.025034666061401367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,64,128,1,float16,float16,0,0.02333866556485494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,64,0,1,fp8,fp8,0,0.02366400013367335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,64,0,1,float16,float16,0,0.023557332654794056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,64,128,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,64,128,1,fp8,fp8,0,0.0235359991590182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,64,0,1,float16,fp8,0,0.023445333043734234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,64,0,1,fp8,fp8,0,0.0233599990606308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,64,128,1,float16,float16,0,0.021509334444999695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,64,0,1,float16,float16,0,0.0210506667693456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,64,128,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,64,128,1,fp8,fp8,0,0.02162133405605952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,64,0,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,64,0,1,fp8,fp8,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,64,128,1,float16,float16,0,0.019306667149066925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,64,0,1,float16,float16,0,0.0210506667693456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,64,128,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,64,128,1,fp8,fp8,0,0.01945066700379054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,64,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,64,0,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,64,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,64,128,1,float16,float16,0,0.01971199984351794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,64,128,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,64,128,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,64,0,1,float16,fp8,0,0.01960533360640208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,64,0,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,64,128,1,float16,float16,0,0.019120000302791595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,64,128,1,float16,fp8,0,0.02120000123977661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,64,0,1,float16,float16,0,0.02128533273935318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,64,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,64,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,64,0,1,fp8,fp8,0,0.01958400011062622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,64,128,1,float16,float16,0,0.021141332884629566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,64,0,1,float16,float16,0,0.019280000279347103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,64,128,1,float16,fp8,0,0.019466667125622433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,64,128,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,64,0,1,float16,fp8,0,0.019434666881958645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,64,0,1,fp8,fp8,0,0.019621333728233974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,64,128,1,float16,float16,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,64,128,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,64,128,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,64,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,64,128,1,float16,float16,0,0.016794666647911072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,64,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,64,128,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,64,128,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,64,0,1,float16,fp8,0,0.016741332908471424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,64,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,64,128,1,float16,float16,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,64,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,64,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,64,0,1,fp8,fp8,0,0.015568000574906668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,64,128,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,64,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,64,128,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,64,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,64,0,1,float16,fp8,0,0.01764800027012825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,64,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,64,128,1,float16,float16,0,0.01664000004529953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,64,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,64,128,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,64,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,64,0,1,fp8,fp8,0,0.016202667107184727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,64,0,1,float16,float16,0,0.016106666376193363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,64,128,1,float16,float16,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,64,128,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,64,128,1,fp8,fp8,0,0.016229332735141117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,64,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,64,0,1,fp8,fp8,0,0.016010666886965435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,64,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,64,128,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,64,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,64,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,64,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,64,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,64,128,1,float16,fp8,0,0.016783999900023144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,64,0,1,fp8,fp8,0,0.015397333850463232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,64,128,1,float16,float16,0,0.017466666797796886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,64,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,64,128,1,float16,fp8,0,0.01752000053723653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,64,128,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,64,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,64,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,64,128,1,float16,float16,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,64,0,1,float16,float16,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,64,128,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,64,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,64,0,1,fp8,fp8,0,0.01575999955336253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,64,128,1,float16,float16,0,0.01613333324591319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,64,0,1,float16,float16,0,0.015344000111023584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,64,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,64,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,64,128,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,64,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,64,128,1,float16,float16,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,64,0,1,float16,float16,0,0.017610666652520496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,64,128,1,float16,fp8,0,0.016490666816631954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,64,128,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,64,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,64,128,1,float16,float16,0,0.01863466699918111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,64,128,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,64,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,64,128,1,fp8,fp8,0,0.015429332852363586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,64,0,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,64,0,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,64,128,1,float16,float16,0,0.015381333728631338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,64,128,1,float16,fp8,0,0.015402667224407196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,64,128,1,fp8,fp8,0,0.0161013330022494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,64,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,64,0,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,64,128,1,float16,float16,0,0.01578666642308235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,64,0,1,float16,float16,0,0.015829333414634068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,64,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,64,0,1,float16,fp8,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,64,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,64,128,1,float16,float16,0,0.14037866393725076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,64,0,1,float16,float16,0,0.14061333735783896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,64,128,1,float16,fp8,0,0.14007467031478882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,64,128,1,fp8,fp8,0,0.12966932853062949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,64,0,1,float16,fp8,0,0.1399733324845632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,64,0,1,fp8,fp8,0,0.12801067034403482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,64,128,1,float16,float16,0,0.14015466968218485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,64,0,1,float16,float16,0,0.139984001715978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,64,128,1,float16,fp8,0,0.14016000429789224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,64,128,1,fp8,fp8,0,0.12933866182963052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,64,0,1,fp8,fp8,0,0.1301866670449575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,64,0,1,float16,fp8,0,0.14005866646766663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,64,128,1,float16,float16,0,0.14019733667373657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,64,128,1,float16,fp8,0,0.1399893363316854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,64,0,1,float16,float16,0,0.14059199889500937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,64,128,1,fp8,fp8,0,0.12990400195121765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,64,0,1,float16,fp8,0,0.14044800400733948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,64,0,1,fp8,fp8,0,0.1297866702079773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,64,128,1,float16,float16,0,0.14101333419481912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,64,0,1,float16,float16,0,0.14106667041778564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,64,128,1,float16,fp8,0,0.1404159963130951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,64,128,1,fp8,fp8,0,0.12989333271980286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,64,0,1,float16,fp8,0,0.1402400036652883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,64,0,1,fp8,fp8,0,0.12956800063451132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,64,128,1,float16,float16,0,0.07853333155314128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,64,0,1,float16,float16,0,0.07879999776681264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,64,128,1,float16,fp8,0,0.0784746656815211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,64,128,1,fp8,fp8,0,0.07446933289368947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,64,0,1,float16,fp8,0,0.07866666714350383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,64,0,1,fp8,fp8,0,0.07430399954319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,64,128,1,float16,float16,0,0.07659199833869934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,64,0,1,float16,float16,0,0.0767680009206136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,64,128,1,float16,fp8,0,0.07667733232180278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,64,128,1,fp8,fp8,0,0.0724533349275589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,64,0,1,float16,fp8,0,0.07846400141716003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,64,0,1,fp8,fp8,0,0.0724373310804367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,64,128,1,float16,float16,0,0.07841599980990092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,64,0,1,float16,float16,0,0.07866666714350383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,64,128,1,float16,fp8,0,0.07871466875076294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,64,128,1,fp8,fp8,0,0.07259200016657512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,64,0,1,float16,fp8,0,0.07869866490364075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,64,0,1,fp8,fp8,0,0.07278400162855785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,64,128,1,float16,float16,0,0.07785599927107494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,64,128,1,float16,fp8,0,0.0788213312625885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,64,0,1,float16,float16,0,0.0786293347676595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,64,128,1,fp8,fp8,0,0.07250133156776428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,64,0,1,float16,fp8,0,0.07845866680145264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,64,0,1,fp8,fp8,0,0.07461866736412048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,64,128,1,float16,float16,0,0.0788800021012624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,64,0,1,float16,float16,0,0.07877333462238312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,64,128,1,float16,fp8,0,0.0783679982026418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,64,128,1,fp8,fp8,0,0.07450133562088013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,64,0,1,float16,fp8,0,0.07858133316040039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,64,128,1,float16,float16,0,0.048309331138928734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,64,0,1,fp8,fp8,0,0.07285333176453908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,64,0,1,float16,float16,0,0.04780266682306925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,64,128,1,float16,fp8,0,0.04804799954096476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,64,128,1,fp8,fp8,0,0.04799466828505198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,64,0,1,float16,fp8,0,0.048026666045188904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,64,0,1,fp8,fp8,0,0.046122665206591286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,64,128,1,float16,float16,0,0.04801600178082784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,64,0,1,float16,float16,0,0.04773333172003428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,64,128,1,float16,fp8,0,0.04798933366934458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,64,128,1,fp8,fp8,0,0.04566933214664459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,64,0,1,float16,fp8,0,0.04798933366934458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,64,0,1,fp8,fp8,0,0.046154667933781944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,64,128,1,float16,float16,0,0.047610665361086525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,64,0,1,float16,float16,0,0.04807466765244802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,64,128,1,float16,fp8,0,0.048170665899912514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,64,128,1,fp8,fp8,0,0.043951998154322304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,64,0,1,float16,fp8,0,0.04817600051561991
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,64,0,1,fp8,fp8,0,0.04586666822433472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,64,128,1,float16,float16,0,0.04769066472848257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,64,0,1,float16,float16,0,0.04805333415667216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,64,128,1,float16,fp8,0,0.04781333108743032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,64,128,1,fp8,fp8,0,0.04571199913819631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,64,0,1,float16,fp8,0,0.048170665899912514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,64,0,1,fp8,fp8,0,0.04574400186538696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,64,128,1,float16,float16,0,0.047930667797724404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,64,0,1,float16,float16,0,0.0480373352766037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,64,128,1,float16,fp8,0,0.04781866570313772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,64,128,1,fp8,fp8,0,0.04631466666857401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,64,0,1,float16,fp8,0,0.04831466575463613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,64,0,1,fp8,fp8,0,0.04560000201066335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,64,128,1,float16,float16,0,0.03128000100453695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,64,0,1,float16,float16,0,0.031530665854612984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,64,128,1,float16,fp8,0,0.031856000423431396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,64,128,1,fp8,fp8,0,0.029509333272775013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,64,0,1,float16,fp8,0,0.03164266546567281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,64,0,1,fp8,fp8,0,0.031194667021433514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,64,128,1,float16,float16,0,0.02961066613594691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,64,0,1,float16,float16,0,0.03151999910672506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,64,128,1,float16,fp8,0,0.03030399978160858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,64,128,1,fp8,fp8,0,0.02922666569550832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,64,0,1,float16,fp8,0,0.03156266609827677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,64,0,1,fp8,fp8,0,0.029482667644818623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,64,128,1,float16,float16,0,0.029696000119050343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,64,0,1,float16,float16,0,0.029792000850041706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,64,128,1,float16,fp8,0,0.031301334500312805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,64,128,1,fp8,fp8,0,0.029578665892283123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,64,0,1,float16,fp8,0,0.030495998760064442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,64,0,1,fp8,fp8,0,0.029552000264326733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,64,128,1,float16,float16,0,0.03160000095764796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,64,0,1,float16,float16,0,0.03126933425664902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,64,128,1,float16,fp8,0,0.03143999973932902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,64,128,1,fp8,fp8,0,0.02937600016593933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,64,0,1,float16,fp8,0,0.031386665999889374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,64,0,1,fp8,fp8,0,0.029557332396507263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,64,128,1,float16,float16,0,0.02939733366171519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,64,0,1,float16,float16,0,0.029834667841593426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,64,128,1,float16,fp8,0,0.031285333136717476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,64,128,1,fp8,fp8,0,0.029370665550231934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,64,0,1,float16,fp8,0,0.03127466638882955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,64,0,1,fp8,fp8,0,0.029152000943819683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,64,128,1,float16,float16,0,0.023226665953795116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,64,0,1,float16,float16,0,0.021770666042963665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,64,128,1,float16,fp8,0,0.023007998863856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,64,128,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,64,0,1,float16,fp8,0,0.02298133323589961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,64,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,64,128,1,float16,float16,0,0.023120000958442688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,64,0,1,float16,float16,0,0.023423999547958374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,64,128,1,float16,fp8,0,0.02346133440732956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,64,128,1,fp8,fp8,0,0.022229333718617756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,64,0,1,float16,fp8,0,0.02327466756105423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,64,0,1,fp8,fp8,0,0.022597332795461018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,64,128,1,float16,float16,0,0.021151999632517498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,64,0,1,float16,float16,0,0.02143466720978419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,64,128,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,64,128,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,64,0,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,64,0,1,fp8,fp8,0,0.023103999594847362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,64,128,1,float16,float16,0,0.021365332106749218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,64,0,1,float16,float16,0,0.023354666928450268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,64,128,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,64,128,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,64,0,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,64,0,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,64,128,1,float16,float16,0,0.023130667706330616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,64,128,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,64,0,1,float16,float16,0,0.02325333406527837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,64,128,1,fp8,fp8,0,0.02310933421055476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,64,0,1,float16,fp8,0,0.02365333338578542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,64,0,1,fp8,fp8,0,0.023029332359631855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,64,128,1,float16,float16,0,0.01933866615096728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,64,0,1,float16,float16,0,0.019141333798567455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,64,128,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,64,128,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,64,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,64,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,64,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,64,0,1,float16,float16,0,0.019152000546455383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,64,128,1,float16,fp8,0,0.019386666516462963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,64,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,64,0,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,64,0,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,64,128,1,float16,float16,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,64,0,1,float16,float16,0,0.019296000401178997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,64,128,1,fp8,fp8,0,0.019472000499566395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,64,128,1,float16,fp8,0,0.01966399947802226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,64,0,1,float16,fp8,0,0.019440000255902607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,64,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,64,128,1,float16,float16,0,0.01894933357834816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,64,128,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,64,128,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,64,0,1,float16,float16,0,0.019695999721686046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,64,0,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,64,0,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,64,128,1,float16,float16,0,0.018976000448067982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,64,0,1,float16,float16,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,64,128,1,float16,fp8,0,0.01958400011062622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,64,128,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,64,0,1,float16,fp8,0,0.019461333751678467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,64,0,1,fp8,fp8,0,0.01982933282852173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,64,128,1,fp8,fp8,0,0.0162773331006368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,64,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,64,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,64,128,1,float16,float16,0,0.015360000232855478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,64,0,1,float16,float16,0,0.015418666104475657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,64,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,64,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,64,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,64,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,64,128,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,64,128,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,64,0,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,64,128,1,float16,float16,0,0.015504000087579092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,64,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,64,128,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,64,128,1,fp8,fp8,0,0.01749333366751671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,64,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,64,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,64,128,1,float16,float16,0,0.016773333152135212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,64,128,1,fp8,fp8,0,0.016458666572968166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,64,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,64,128,1,float16,float16,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,64,0,1,float16,float16,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,64,128,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,64,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,64,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,64,128,1,float16,float16,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,64,0,1,float16,float16,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,64,128,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,64,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,64,128,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,64,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,64,128,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,64,0,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,64,0,1,fp8,fp8,0,0.017397332936525345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,64,128,1,float16,float16,0,0.015311999867359797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,64,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,64,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,64,128,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,64,128,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,64,128,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,64,0,1,float16,fp8,0,0.015674666812022526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,64,0,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,64,128,1,float16,float16,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,64,0,1,float16,float16,0,0.015311999867359797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,64,128,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,64,0,1,float16,fp8,0,0.01609066625436147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,64,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,64,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,64,128,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,64,128,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,64,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,64,128,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,64,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,64,128,1,float16,fp8,0,0.016762666404247284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,64,128,1,fp8,fp8,0,0.015461333096027374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,64,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,64,0,1,float16,float16,0,0.01492799942692121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,64,128,1,fp8,fp8,0,0.016704000532627106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,64,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,64,0,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,1,64,128,1,float16,float16,0,0.12177600463231404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,1,64,0,1,float16,float16,0,0.12178666392962138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,1,64,128,1,float16,fp8,0,0.12195199728012085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,1,64,128,1,fp8,fp8,0,0.11160000165303548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,1,64,0,1,float16,fp8,0,0.11973866820335388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,1,64,0,1,fp8,fp8,0,0.11183999975522359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,2,64,128,1,float16,float16,0,0.12059199810028076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,2,64,0,1,float16,float16,0,0.1200320025285085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,2,64,128,1,float16,fp8,0,0.12185066938400269
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,2,64,0,1,fp8,fp8,0,0.11134399970372517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,2,64,0,1,float16,fp8,0,0.11994666854540507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,2,64,128,1,fp8,fp8,0,0.11136000355084737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,4,64,128,1,float16,float16,0,0.12008532881736755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,4,64,0,1,float16,float16,0,0.12026133139928182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,4,64,128,1,float16,fp8,0,0.11973333358764648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,4,64,128,1,fp8,fp8,0,0.11196800072987874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,4,64,0,1,fp8,fp8,0,0.11149332920710246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,4,64,0,1,float16,fp8,0,0.12216533223787944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,8,64,128,1,float16,float16,0,0.12179733316103618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,8,64,0,1,float16,float16,0,0.12175466616948445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,8,64,128,1,float16,fp8,0,0.12196800112724304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,8,64,128,1,fp8,fp8,0,0.11157332857449849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,8,64,0,1,float16,fp8,0,0.12176000078519185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,8,64,0,1,fp8,fp8,0,0.11147733529408772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,24,64,128,1,float16,float16,0,0.06843199829260509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,0,0.06835733354091644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,24,64,128,1,float16,fp8,0,0.06846400101979573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,24,64,128,1,fp8,fp8,0,0.06233599781990051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,0,0.06835733354091644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,24,64,0,1,fp8,fp8,0,0.0625493327776591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,1,64,128,1,float16,float16,0,0.06798399984836578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,1,64,0,1,float16,float16,0,0.06666666766007741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,1,64,128,1,float16,fp8,0,0.06816000243028005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,1,64,128,1,fp8,fp8,0,0.06383466720581055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,1,64,0,1,float16,fp8,0,0.06811733543872833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,1,64,0,1,fp8,fp8,0,0.06229333579540253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,2,64,128,1,float16,float16,0,0.06854933500289917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,2,64,0,1,float16,float16,0,0.0663679987192154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,2,64,128,1,float16,fp8,0,0.06825066606203715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,2,64,128,1,fp8,fp8,0,0.06423999865849812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,2,64,0,1,float16,fp8,0,0.0683786670366923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,2,64,0,1,fp8,fp8,0,0.06417599817117055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,4,64,128,1,float16,float16,0,0.06875200072924297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,4,64,0,1,float16,float16,0,0.06829333305358887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,4,64,128,1,float16,fp8,0,0.06845866640408833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,4,64,128,1,fp8,fp8,0,0.06247466802597046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,4,64,0,1,float16,fp8,0,0.06821333368619283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,4,64,0,1,fp8,fp8,0,0.062165334820747375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,8,64,128,1,float16,float16,0,0.0681333343187968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,8,64,0,1,float16,float16,0,0.06723199784755707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,8,64,128,1,float16,fp8,0,0.06840533514817555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,8,64,128,1,fp8,fp8,0,0.06249066690603892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,24,64,128,1,float16,float16,0,0.04161066561937332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,8,64,0,1,fp8,fp8,0,0.0642133355140686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,8,64,0,1,float16,fp8,0,0.06901866694291432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,0,0.04178666571776072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,24,64,128,1,float16,fp8,0,0.043434664607048035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,24,64,128,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,24,64,0,1,fp8,fp8,0,0.03976533313592275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,0,0.04450133442878723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,1,64,128,1,float16,float16,0,0.04191466669241587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,1,64,0,1,float16,float16,0,0.04162666698296865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,1,64,128,1,float16,fp8,0,0.04178133110205332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,1,64,128,1,fp8,fp8,0,0.039808000127474465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,1,64,0,1,fp8,fp8,0,0.04005333284536997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,1,64,0,1,float16,fp8,0,0.043231998880704246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,2,64,128,1,float16,float16,0,0.04187199970086416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,2,64,0,1,float16,float16,0,0.04161066561937332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,2,64,128,1,float16,fp8,0,0.041696002086003624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,2,64,128,1,fp8,fp8,0,0.04051200052102407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,2,64,0,1,float16,fp8,0,0.04171200096607208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,2,64,0,1,fp8,fp8,0,0.03953066716591517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,4,64,128,1,float16,float16,0,0.04155733436346054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,4,64,0,1,float16,float16,0,0.04221866528193156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,4,64,128,1,float16,fp8,0,0.04394666850566864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,4,64,128,1,fp8,fp8,0,0.03963200002908707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,4,64,0,1,float16,fp8,0,0.041989331444104515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,8,64,128,1,float16,float16,0,0.04177066683769226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,8,64,0,1,float16,float16,0,0.041834667325019836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,8,64,128,1,float16,fp8,0,0.041706666350364685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,4,64,0,1,fp8,fp8,0,0.0403466671705246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,8,64,128,1,fp8,fp8,0,0.039893334110577904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,8,64,0,1,float16,fp8,0,0.04362666606903076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,8,64,0,1,fp8,fp8,0,0.039664000272750854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,24,64,128,1,float16,float16,0,0.02959999938805898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,0,0.02951466788848241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,24,64,128,1,float16,fp8,0,0.029509333272775013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,24,64,128,1,fp8,fp8,0,0.02733866622050603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,0,0.02942399928967158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,24,64,0,1,fp8,fp8,0,0.027141332626342773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,1,64,128,1,float16,float16,0,0.029285334050655365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,1,64,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,1,64,128,1,float16,fp8,0,0.029509333272775013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,1,64,128,1,fp8,fp8,0,0.027402666707833607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,1,64,0,1,float16,fp8,0,0.029418667157491047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,1,64,0,1,fp8,fp8,0,0.027232001225153606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,2,64,128,1,float16,float16,0,0.02977599948644638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,2,64,0,1,float16,float16,0,0.029103999336560566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,2,64,128,1,float16,fp8,0,0.029626667499542236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,2,64,128,1,fp8,fp8,0,0.02754666656255722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,2,64,0,1,float16,fp8,0,0.02956266701221466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,2,64,0,1,fp8,fp8,0,0.02720533311367035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,4,64,128,1,float16,float16,0,0.031680000325044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,4,64,0,1,float16,float16,0,0.029311999678611755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,4,64,128,1,float16,fp8,0,0.029264000554879505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,4,64,128,1,fp8,fp8,0,0.02714666724205017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,4,64,0,1,float16,fp8,0,0.02935466667016347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,4,64,0,1,fp8,fp8,0,0.02938666691382726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,8,64,128,1,float16,float16,0,0.027589333554108936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,8,64,0,1,float16,float16,0,0.029477333029111225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,8,64,128,1,float16,fp8,0,0.02934933453798294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,8,64,128,1,fp8,fp8,0,0.02738133321205775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,24,64,128,1,float16,float16,0,0.021359999974568684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,8,64,0,1,fp8,fp8,0,0.027471999327341717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,8,64,0,1,float16,fp8,0,0.030042665700117748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,0,0.021365332106749218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,24,64,128,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,24,64,128,1,fp8,fp8,0,0.02149333308140437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,0,0.022826666633288067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,24,64,0,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,1,64,128,1,float16,float16,0,0.023472001155217487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,1,64,0,1,float16,float16,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,1,64,128,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,1,64,128,1,fp8,fp8,0,0.020879998803138733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,1,64,0,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,1,64,0,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,2,64,128,1,float16,float16,0,0.02146133283774058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,2,64,0,1,float16,float16,0,0.021141332884629566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,2,64,128,1,float16,fp8,0,0.023610666394233704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,2,64,128,1,fp8,fp8,0,0.02089600016673406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,2,64,0,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,2,64,0,1,fp8,fp8,0,0.021418665846188862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,4,64,128,1,float16,float16,0,0.02142400046189626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,4,64,0,1,float16,float16,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,4,64,128,1,float16,fp8,0,0.02146133283774058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,4,64,128,1,fp8,fp8,0,0.021536000072956085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,4,64,0,1,float16,fp8,0,0.02306666721900304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,4,64,0,1,fp8,fp8,0,0.02147199958562851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,8,64,128,1,float16,float16,0,0.021402666966120403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,8,64,0,1,float16,float16,0,0.021935999393463135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,8,64,128,1,float16,fp8,0,0.021402666966120403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,8,64,128,1,fp8,fp8,0,0.021477334201335907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,8,64,0,1,float16,fp8,0,0.02309333284695943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,8,64,0,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,24,64,128,1,float16,float16,0,0.01952533299724261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,24,64,128,1,float16,fp8,0,0.019354666272799175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,24,64,128,1,fp8,fp8,0,0.01940800001223882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,24,64,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,1,64,128,1,float16,float16,0,0.018960000326236088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,1,64,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,1,64,128,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,1,64,128,1,fp8,fp8,0,0.019402666638294857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,1,64,0,1,float16,fp8,0,0.019141333798567455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,1,64,0,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,2,64,128,1,float16,float16,0,0.019482667247454327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,2,64,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,2,64,128,1,float16,fp8,0,0.019600000232458115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,2,64,128,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,2,64,0,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,2,64,0,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,4,64,128,1,float16,float16,0,0.019381333142518997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,4,64,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,4,64,128,1,float16,fp8,0,0.02057066683967908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,4,64,128,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,4,64,0,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,4,64,0,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,8,64,128,1,float16,float16,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,8,64,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,8,64,128,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,8,64,128,1,fp8,fp8,0,0.019578666736682255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,8,64,0,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,8,64,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,24,64,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,24,64,128,1,float16,fp8,0,0.016714667280515034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,24,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,24,64,0,1,fp8,fp8,0,0.015599999576807022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,1,64,128,1,float16,float16,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,1,64,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,1,64,128,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,1,64,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,1,64,0,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,1,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,2,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,2,64,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,2,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,2,64,128,1,fp8,fp8,0,0.01588800052801768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,2,64,0,1,float16,fp8,0,0.016336000214020412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,2,64,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,4,64,128,1,float16,float16,0,0.01684800038735072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,4,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,4,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,4,64,128,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,4,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,4,64,0,1,fp8,fp8,0,0.015674666812022526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,8,64,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,8,64,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,8,64,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,8,64,128,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,8,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,8,64,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,24,64,128,1,float16,float16,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,24,64,128,1,float16,fp8,0,0.01597333326935768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,24,64,128,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,1,64,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,1,64,128,1,float16,float16,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,24,64,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,1,64,128,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,1,64,128,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,1,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,1,64,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,2,64,128,1,float16,float16,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,2,64,0,1,float16,float16,0,0.017509333789348602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,2,64,128,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,2,64,128,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,2,64,0,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,2,64,0,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,4,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,4,64,128,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,4,64,0,1,float16,float16,0,0.01568000018596649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,4,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,4,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,8,64,128,1,float16,float16,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,4,64,0,1,fp8,fp8,0,0.015381333728631338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,8,64,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,8,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,8,64,128,1,fp8,fp8,0,0.017498667041460674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,8,64,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,8,64,0,1,fp8,fp8,0,0.01682666689157486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,24,64,128,1,float16,float16,0,0.0173333336909612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,0,0.014901333798964819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,24,64,128,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,24,64,128,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,24,64,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,1,64,128,1,float16,float16,0,0.015381333728631338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,1,64,0,1,float16,float16,0,0.015664000064134598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,1,64,128,1,float16,fp8,0,0.01562133307258288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,1,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,1,64,128,1,fp8,fp8,0,0.015669333438078564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,1,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,2,64,128,1,float16,float16,0,0.014912000546852747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,2,64,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,2,64,128,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,2,64,128,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,2,64,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,2,64,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,4,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,4,64,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,4,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,4,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,4,64,0,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,4,64,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,8,64,128,1,float16,float16,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,8,64,128,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,8,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,8,64,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,8,64,0,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,8,64,0,1,fp8,fp8,0,0.01569066693385442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,64,128,1,float16,float16,0,0.881605307261149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,64,128,1,float16,fp8,0,0.8893333276112875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,64,128,1,fp8,fp8,0,0.8264640172322592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,64,128,1,float16,float16,0,0.8980586528778076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,64,0,1,float16,float16,0,5.4977067311604815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,64,0,1,float16,fp8,0,5.3524214426676435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,64,128,1,float16,fp8,0,0.9076693058013916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,64,0,1,fp8,fp8,0,4.974101384480794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,64,128,1,fp8,fp8,0,0.8453546365102133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,64,128,1,float16,float16,0,0.9145867029825846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,64,0,1,float16,float16,0,5.5811411539713545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,64,128,1,float16,fp8,0,0.9220960140228271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,64,0,1,fp8,fp8,0,4.991658528645833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,64,128,1,fp8,fp8,0,0.865109364191691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,64,0,1,float16,fp8,0,5.62283198038737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,64,128,1,float16,float16,0,0.9438666502634684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,64,0,1,float16,float16,0,5.381797154744466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,64,128,1,float16,fp8,0,0.9535253047943115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,64,128,1,fp8,fp8,0,0.9022986888885498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,64,0,1,fp8,fp8,0,5.012266794840495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,64,0,1,float16,fp8,0,5.387173334757487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,64,128,1,float16,float16,0,0.5276000102361044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,64,0,1,float16,float16,0,5.642058690388997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,64,128,1,float16,fp8,0,0.572271982828776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,64,128,1,fp8,fp8,0,0.5102560122807821
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,64,0,1,float16,float16,0,3.0316747029622397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,64,0,1,fp8,fp8,0,5.05026117960612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,64,0,1,float16,fp8,0,5.426266352335612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,64,128,1,float16,float16,0,0.4679146607716878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,64,128,1,float16,fp8,0,0.47175467014312744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,64,0,1,float16,fp8,0,2.8939574559529624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,64,0,1,fp8,fp8,0,2.633461316426595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,64,128,1,fp8,fp8,0,0.4413226842880249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,64,0,1,float16,float16,0,2.7607946395874023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,64,128,1,float16,float16,0,0.4726933240890503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,64,0,1,fp8,fp8,0,2.5642453829447427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,64,0,1,float16,fp8,0,2.8328800201416016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,64,128,1,float16,fp8,0,0.47807466983795166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,64,128,1,fp8,fp8,0,0.4472800095876058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,64,0,1,float16,float16,0,2.7695945103963218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,64,128,1,float16,float16,0,0.47969599564870197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,64,0,1,fp8,fp8,0,2.5689493815104165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,64,0,1,float16,fp8,0,2.8145974477132163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,64,128,1,float16,fp8,0,0.4862133264541626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,64,128,1,fp8,fp8,0,0.45578134059906006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,64,0,1,float16,float16,0,2.8263572057088218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,64,128,1,float16,float16,0,0.4938773314158122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,64,128,1,float16,fp8,0,0.5024053255716959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,64,0,1,fp8,fp8,0,2.576906681060791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,64,0,1,float16,fp8,0,2.92138671875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,64,128,1,fp8,fp8,0,0.47386666138966876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,64,128,1,float16,float16,0,0.2958666682243347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,64,0,1,float16,float16,0,2.7828801472981772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,64,128,1,float16,fp8,0,0.30481600761413574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,64,128,1,fp8,fp8,0,0.291701336701711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,64,0,1,float16,float16,0,1.4927679697672527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,64,0,1,fp8,fp8,0,2.5950986544291177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,64,0,1,float16,fp8,0,2.865237236022949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,64,128,1,float16,float16,0,0.26662399371465045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,64,128,1,float16,fp8,0,0.2676053245862325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,64,0,1,float16,fp8,0,1.5021920204162598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,64,0,1,fp8,fp8,0,1.400309403737386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,64,0,1,float16,float16,0,1.4579092661539714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,64,128,1,fp8,fp8,0,0.25494933128356934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,64,128,1,float16,float16,0,0.26953067382176715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,64,128,1,float16,fp8,0,0.27135467529296875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,64,0,1,float16,fp8,0,1.4600106875101726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,64,0,1,fp8,fp8,0,1.3643786112467449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,64,0,1,float16,float16,0,1.4606560071309407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,64,128,1,fp8,fp8,0,0.2587466637293498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,64,128,1,float16,float16,0,0.275221327940623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,64,128,1,float16,fp8,0,0.2772853374481201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,64,0,1,fp8,fp8,0,1.366879940032959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,64,0,1,float16,fp8,0,1.464255968729655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,64,0,1,float16,float16,0,1.4665385882059734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,64,128,1,fp8,fp8,0,0.26309333244959515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,64,128,1,float16,float16,0,0.2804479996363322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,64,0,1,fp8,fp8,0,1.3723360697428386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,64,0,1,float16,fp8,0,1.4700533548990886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,64,128,1,float16,fp8,0,0.28575466076533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,64,0,1,float16,float16,0,1.4746185938517253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,64,128,1,fp8,fp8,0,0.2733760078748067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,64,128,1,float16,float16,0,0.2097760041554769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,64,128,1,float16,fp8,0,0.2125706672668457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,64,0,1,float16,fp8,0,1.4805493354797363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,64,0,1,float16,float16,0,0.865834633509318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,64,0,1,fp8,fp8,0,1.3791146278381348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,64,128,1,fp8,fp8,0,0.2015999952952067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,64,128,1,float16,float16,0,0.20634132623672485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,64,0,1,fp8,fp8,0,0.8045547008514404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,64,128,1,float16,fp8,0,0.20757333437601724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,64,0,1,float16,fp8,0,0.865781307220459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,64,0,1,float16,float16,0,0.8567466735839844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,64,128,1,fp8,fp8,0,0.19758933782577515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,64,128,1,float16,float16,0,0.2056480050086975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,64,0,1,fp8,fp8,0,0.8017333348592123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,64,0,1,float16,fp8,0,0.8573919932047526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,64,128,1,float16,fp8,0,0.20784533023834229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,64,128,1,fp8,fp8,0,0.19750400384267172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,64,0,1,float16,float16,0,0.8601919809977213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,64,128,1,float16,float16,0,0.20612800121307373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,64,0,1,float16,fp8,0,0.8582932949066162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,64,0,1,fp8,fp8,0,0.8023359775543213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,64,128,1,float16,fp8,0,0.20570667584737143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,64,0,1,float16,float16,0,0.861407995223999
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,64,128,1,fp8,fp8,0,0.19940799474716187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,64,128,1,float16,float16,0,0.2097760041554769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,64,0,1,fp8,fp8,0,0.802191972732544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,64,0,1,float16,fp8,0,0.858890692392985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,64,128,1,float16,fp8,0,0.20920000473658243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,64,0,1,float16,float16,0,0.8617920080820719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,64,128,1,fp8,fp8,0,0.19960000117619833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,64,0,1,float16,fp8,0,0.8621919949849447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,64,128,1,float16,float16,0,0.6641120115915934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,64,0,1,fp8,fp8,0,0.8022613525390625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,64,128,1,fp8,fp8,0,0.6201226711273193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,64,128,1,float16,fp8,0,0.6693759759267172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,64,128,1,float16,float16,0,0.6705973148345947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,64,0,1,float16,float16,0,3.396005312601725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,64,0,1,float16,fp8,0,3.282965342203776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,64,128,1,float16,fp8,0,0.6952640215555826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,64,0,1,fp8,fp8,0,2.9491678873697915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,64,128,1,fp8,fp8,0,0.6316853364308676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,64,128,1,float16,float16,0,0.6835467020670573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,64,0,1,float16,float16,0,3.374272028605143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,64,128,1,float16,fp8,0,0.6893119812011719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,64,0,1,fp8,fp8,0,2.960757255554199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,64,0,1,float16,fp8,0,3.492821375528971
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,64,128,1,fp8,fp8,0,0.647482673327128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,64,128,1,float16,float16,0,0.7038613160451254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,64,0,1,float16,float16,0,3.192511876424154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,64,128,1,float16,fp8,0,0.7146133581797282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,64,0,1,float16,fp8,0,3.197098731994629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,64,0,1,fp8,fp8,0,2.977205276489258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,64,128,1,fp8,fp8,0,0.6752639611562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,64,0,1,float16,float16,0,3.261098543802897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,64,128,1,float16,float16,0,0.40115201473236084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,64,0,1,fp8,fp8,0,3.0058345794677734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,64,0,1,float16,fp8,0,3.356069246927897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,64,0,1,float16,float16,0,1.6989332834879558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,64,128,1,float16,fp8,0,0.40963200728098553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,64,128,1,fp8,fp8,0,0.38911465803782147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,64,128,1,float16,float16,0,0.35580265522003174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,64,0,1,float16,fp8,0,1.7262719472249348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,64,0,1,fp8,fp8,0,1.590127944946289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,64,128,1,float16,fp8,0,0.36191999912261963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,64,128,1,fp8,fp8,0,0.3378346761067708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,64,0,1,float16,float16,0,1.646997292836507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,64,128,1,float16,float16,0,0.3622080087661743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,64,128,1,float16,fp8,0,0.36205331484476727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,64,0,1,float16,fp8,0,1.6505653063456218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,64,0,1,fp8,fp8,0,1.5397119522094727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,64,128,1,fp8,fp8,0,0.3415199915568034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,64,0,1,float16,float16,0,1.651253382364909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,64,128,1,float16,float16,0,0.3662666479746501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,64,128,1,float16,fp8,0,0.37092800935109455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,64,0,1,fp8,fp8,0,1.5430827140808105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,64,0,1,float16,fp8,0,1.6562933921813965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,64,128,1,fp8,fp8,0,0.3489226500193278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,64,0,1,float16,float16,0,1.6597065925598145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,64,128,1,float16,float16,0,0.3761013348897298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,64,0,1,fp8,fp8,0,1.549909273783366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,64,0,1,float16,fp8,0,1.664170742034912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,64,128,1,float16,fp8,0,0.382426659266154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,64,128,1,fp8,fp8,0,0.3614186843236287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,64,0,1,float16,float16,0,1.6727466583251953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,64,128,1,float16,float16,0,0.2283786733945211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,64,128,1,float16,fp8,0,0.23350934187571207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,64,0,1,fp8,fp8,0,1.5622986157735188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,64,0,1,float16,fp8,0,1.678602695465088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,64,0,1,float16,float16,0,0.9222026666005453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,64,128,1,fp8,fp8,0,0.2241333325703939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,64,128,1,float16,float16,0,0.20195200045903525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,64,0,1,float16,fp8,0,0.9217973550160726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,64,0,1,fp8,fp8,0,0.8613119920094808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,64,128,1,float16,fp8,0,0.2039626638094584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,64,0,1,float16,float16,0,0.8869919776916504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,64,128,1,fp8,fp8,0,0.1972800095876058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,64,128,1,float16,float16,0,0.2056586742401123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,64,0,1,float16,fp8,0,0.8897653420766195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,64,0,1,fp8,fp8,0,0.8329226970672607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,64,128,1,float16,fp8,0,0.20595733324686685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,64,0,1,float16,float16,0,0.8887306849161783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,64,128,1,fp8,fp8,0,0.199237326780955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,64,128,1,float16,float16,0,0.20997333526611328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,64,0,1,fp8,fp8,0,0.8343573411305746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,64,0,1,float16,fp8,0,0.890175978342692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,64,128,1,float16,fp8,0,0.2119040091832479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,64,0,1,float16,float16,0,0.89683731396993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,64,128,1,fp8,fp8,0,0.20220800240834555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,64,128,1,float16,float16,0,0.21610132853190103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,64,0,1,float16,fp8,0,0.8950186570485433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,64,0,1,fp8,fp8,0,0.8389493624369303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,64,128,1,float16,fp8,0,0.22001065810521445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,64,0,1,float16,float16,0,0.9032800197601318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,64,128,1,fp8,fp8,0,0.21010667085647583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,64,128,1,float16,float16,0,0.16484799981117249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,64,0,1,float16,float16,0,0.5485226710637411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,64,0,1,float16,fp8,0,0.9069493611653646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,64,128,1,float16,fp8,0,0.16530666748682657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,64,0,1,fp8,fp8,0,0.8463040192921957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,64,128,1,fp8,fp8,0,0.15690666437149048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,64,128,1,float16,float16,0,0.16265599926312765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,64,0,1,fp8,fp8,0,0.5107253392537435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,64,0,1,float16,fp8,0,0.5474506616592407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,64,128,1,float16,fp8,0,0.1607360045115153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,64,128,1,fp8,fp8,0,0.1543786625067393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,64,0,1,float16,float16,0,0.5439253250757853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,64,0,1,float16,fp8,0,0.5450239976247152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,64,128,1,float16,float16,0,0.1607360045115153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,64,0,1,fp8,fp8,0,0.5087733268737793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,64,0,1,float16,float16,0,0.5440800189971924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,64,128,1,float16,fp8,0,0.16261333227157593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,64,128,1,fp8,fp8,0,0.15373333295186362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,64,128,1,float16,float16,0,0.1612266699473063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,64,0,1,float16,fp8,0,0.5448906819025675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,64,0,1,fp8,fp8,0,0.5081439812978109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,64,0,1,float16,float16,0,0.5452746550242106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,64,128,1,float16,fp8,0,0.16132799784342447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,64,128,1,fp8,fp8,0,0.15451199809710184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,64,128,1,float16,float16,0,0.16273599863052368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,64,0,1,float16,fp8,0,0.5448053280512491
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,64,0,1,fp8,fp8,0,0.507856011390686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,64,0,1,float16,float16,0,0.5482079982757568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,64,128,1,float16,fp8,0,0.16379732886950174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,64,128,1,fp8,fp8,0,0.15486933787663779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,64,0,1,float16,fp8,0,0.5491893291473389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,64,0,1,fp8,fp8,0,0.5101173321406046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,64,128,1,float16,float16,0,0.5536799828211466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,64,128,1,float16,fp8,0,0.5593706766764323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,64,128,1,fp8,fp8,0,0.5184266567230225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,64,128,1,float16,float16,0,0.5587733189264933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,64,0,1,float16,float16,0,2.3454559644063315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,64,0,1,float16,fp8,0,2.2963199615478516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,64,128,1,float16,fp8,0,0.5662186543146769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,64,0,1,fp8,fp8,0,2.1338346799214682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,64,128,1,fp8,fp8,0,0.526528000831604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,64,0,1,float16,float16,0,2.298895994822184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,64,128,1,float16,float16,0,0.5693120161692301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,64,0,1,float16,fp8,0,2.304965337117513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,64,0,1,fp8,fp8,0,2.142314592997233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,64,128,1,float16,fp8,0,0.5839733282725016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,64,128,1,fp8,fp8,0,0.5393333435058594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,64,0,1,float16,float16,0,2.3113120396931968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,64,128,1,float16,float16,0,0.5877279837926229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,64,0,1,float16,fp8,0,2.3308960596720376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,64,0,1,fp8,fp8,0,2.1540427207946777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,64,128,1,float16,fp8,0,0.5969119866689047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,64,128,1,fp8,fp8,0,0.5600800116856893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,64,128,1,float16,float16,0,0.33423467477162677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,64,0,1,float16,float16,0,2.3327627182006836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,64,128,1,float16,fp8,0,0.34254932403564453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,64,128,1,fp8,fp8,0,0.32553066809972125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,64,0,1,float16,float16,0,1.2420960267384846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,64,0,1,fp8,fp8,0,2.1782612800598145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,64,0,1,float16,fp8,0,2.338325341542562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,64,128,1,float16,float16,0,0.2948960065841675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,64,128,1,float16,fp8,0,0.2976106603940328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,64,0,1,fp8,fp8,0,1.1646986802419026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,64,0,1,float16,fp8,0,1.2497386932373047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,64,0,1,float16,float16,0,1.1975946426391602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,64,128,1,fp8,fp8,0,0.2815786600112915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,64,128,1,float16,float16,0,0.2994719942410787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,64,0,1,fp8,fp8,0,1.1225120226542156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,64,0,1,float16,fp8,0,1.2015466690063477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,64,128,1,float16,fp8,0,0.30239466826121014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,64,0,1,float16,float16,0,1.2014293670654297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,64,128,1,fp8,fp8,0,0.2877440055211385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,64,128,1,float16,float16,0,0.3057653307914734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,64,0,1,fp8,fp8,0,1.1249866485595703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,64,0,1,float16,fp8,0,1.2037546634674072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,64,128,1,float16,fp8,0,0.3091839949289958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,64,128,1,fp8,fp8,0,0.29185599088668823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,64,0,1,float16,float16,0,1.2106613318125408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,64,128,1,float16,float16,0,0.3134506742159526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,64,0,1,float16,fp8,0,1.2117813428243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,64,0,1,fp8,fp8,0,1.1311519940694172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,64,128,1,float16,fp8,0,0.31862932443618774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,64,0,1,float16,float16,0,1.2221813201904297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,64,128,1,fp8,fp8,0,0.30184000730514526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,64,128,1,float16,float16,0,0.19203199942906699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,64,128,1,float16,fp8,0,0.19774933656056723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,64,0,1,float16,fp8,0,1.226101318995158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,64,0,1,float16,float16,0,0.6781760056813558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,64,0,1,fp8,fp8,0,1.1415893236796062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,64,128,1,fp8,fp8,0,0.1895893414815267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,64,128,1,float16,float16,0,0.17087467511494955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,64,0,1,fp8,fp8,0,0.6396533250808716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,64,0,1,float16,fp8,0,0.6849919954935709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,64,128,1,float16,fp8,0,0.1706719994544983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,64,0,1,float16,float16,0,0.6560480197270712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,64,128,1,fp8,fp8,0,0.16484799981117249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,64,128,1,float16,float16,0,0.1721173326174418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,64,0,1,float16,fp8,0,0.6546186606089274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,64,0,1,fp8,fp8,0,0.6124853293100992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,64,128,1,float16,fp8,0,0.1731626590092977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,64,0,1,float16,float16,0,0.6549919843673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,64,128,1,fp8,fp8,0,0.16668800512949625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,64,128,1,float16,float16,0,0.17510400215784708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,64,0,1,float16,fp8,0,0.6566133499145508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,64,0,1,fp8,fp8,0,0.6152799924214681
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,64,128,1,float16,fp8,0,0.17721599340438843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,64,0,1,float16,float16,0,0.6608266830444336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,64,128,1,fp8,fp8,0,0.17169066270192465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,64,128,1,float16,float16,0,0.18110400438308716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,64,0,1,float16,fp8,0,0.6623466809590658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,64,0,1,fp8,fp8,0,0.6196586688359579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,64,128,1,float16,fp8,0,0.18475733200709024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,64,0,1,float16,float16,0,0.6668907006581625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,64,128,1,fp8,fp8,0,0.1776426633199056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,64,128,1,float16,float16,0,0.139984001715978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,64,0,1,float16,fp8,0,0.6695679823557535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,64,128,1,float16,fp8,0,0.13871999581654867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,64,0,1,fp8,fp8,0,0.6261119842529297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,64,0,1,float16,float16,0,0.41781866550445557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,64,128,1,fp8,fp8,0,0.1346560021241506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,64,128,1,float16,float16,0,0.13806933164596558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,64,0,1,fp8,fp8,0,0.38654398918151855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,64,0,1,float16,fp8,0,0.41492799917856854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,64,128,1,float16,fp8,0,0.1381226678689321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,64,0,1,float16,float16,0,0.41178667545318604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,64,128,1,fp8,fp8,0,0.1320746640364329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,64,0,1,float16,fp8,0,0.41145066420237225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,64,128,1,float16,float16,0,0.13805866241455078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,64,0,1,fp8,fp8,0,0.38602133591969806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,64,0,1,float16,float16,0,0.415338675181071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,64,128,1,float16,fp8,0,0.1381600002447764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,64,128,1,fp8,fp8,0,0.1318880021572113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,64,128,1,float16,float16,0,0.13821867108345032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,64,0,1,float16,fp8,0,0.41515199343363446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,64,0,1,fp8,fp8,0,0.3843253453572591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,64,0,1,float16,float16,0,0.4121439854303996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,64,128,1,float16,fp8,0,0.1379146675268809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,64,128,1,fp8,fp8,0,0.13193600376447043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,64,128,1,float16,float16,0,0.13795199990272522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,64,0,1,float16,fp8,0,0.41309865315755206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,64,0,1,fp8,fp8,0,0.38570666313171387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,64,128,1,float16,fp8,0,0.13802133003870645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,64,128,1,fp8,fp8,0,0.1335093379020691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,64,0,1,float16,float16,0,0.41279999415079754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,64,0,1,float16,fp8,0,0.4147679805755615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,64,0,1,fp8,fp8,0,0.38627731800079346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,64,128,1,float16,float16,0,0.8582399686177572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,64,128,1,float16,fp8,0,0.8661653200785319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,64,128,1,fp8,fp8,0,0.803061326344808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,64,0,1,float16,float16,0,3.1014347076416016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,64,128,1,float16,float16,0,0.875711997350057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,64,0,1,float16,fp8,0,3.0741974512736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,64,0,1,fp8,fp8,0,2.8152427673339844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,64,128,1,float16,fp8,0,0.8818879922231039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,64,128,1,fp8,fp8,0,0.8227519989013672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,64,128,1,float16,float16,0,0.8917493025461832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,64,0,1,float16,float16,0,3.1684799194335938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,64,0,1,fp8,fp8,0,2.833984057108561
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,64,128,1,float16,fp8,0,0.8995359738667806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,64,0,1,float16,fp8,0,3.0511468251546225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,64,128,1,fp8,fp8,0,0.8425760269165039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,64,0,1,float16,float16,0,3.0626932779947915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,64,128,1,float16,float16,0,0.9220693111419678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,64,0,1,fp8,fp8,0,2.856106758117676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,64,0,1,float16,fp8,0,3.201514561971029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,64,128,1,float16,fp8,0,0.9320053259531657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,64,128,1,fp8,fp8,0,0.8812266985575358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,64,0,1,float16,float16,0,3.0987412134806314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,64,128,1,float16,float16,0,0.5045386552810669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,64,128,1,float16,fp8,0,0.5154613256454468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,64,0,1,fp8,fp8,0,2.892047882080078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,64,0,1,float16,float16,0,1.6223093668619792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,64,128,1,fp8,fp8,0,0.48846399784088135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,64,0,1,float16,fp8,0,3.2623252868652344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,64,128,1,float16,float16,0,0.44391465187072754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,64,128,1,float16,fp8,0,0.4477440118789673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,64,0,1,fp8,fp8,0,1.519584019978841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,64,0,1,float16,fp8,0,1.6332532564798992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,64,0,1,float16,float16,0,1.5574506123860676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,64,128,1,fp8,fp8,0,0.41765332221984863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,64,128,1,float16,float16,0,0.45083733399709064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,64,0,1,fp8,fp8,0,1.4478880564371746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,64,128,1,float16,fp8,0,0.4533546765645345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,64,0,1,float16,fp8,0,1.5591200192769368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,64,0,1,float16,float16,0,1.5613493919372559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,64,128,1,fp8,fp8,0,0.4242560068766276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,64,128,1,float16,float16,0,0.45634667078653973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,64,0,1,fp8,fp8,0,1.4554506937662761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,64,0,1,float16,fp8,0,1.5649174054463704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,64,128,1,float16,fp8,0,0.4617973168690999
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,64,0,1,float16,float16,0,1.5700160662333171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,64,128,1,fp8,fp8,0,0.4331306616465251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,64,128,1,float16,float16,0,0.47225598494211835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,64,0,1,float16,fp8,0,1.6193866729736328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,64,0,1,fp8,fp8,0,1.4642613728841145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,64,128,1,float16,fp8,0,0.4785066843032837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,64,0,1,float16,float16,0,1.585978666941325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,64,128,1,fp8,fp8,0,0.44944532712300617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,64,128,1,float16,float16,0,0.27137066920598346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,64,128,1,float16,fp8,0,0.27749866247177124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,64,0,1,float16,float16,0,0.8580533663431803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,64,0,1,float16,fp8,0,1.5942187309265137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,64,0,1,fp8,fp8,0,1.4800426165262859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,64,128,1,fp8,fp8,0,0.2650559941927592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,64,128,1,float16,float16,0,0.23926933606465658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,64,0,1,float16,fp8,0,0.8653279940287272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,64,0,1,fp8,fp8,0,0.8055893580118815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,64,128,1,float16,fp8,0,0.24200000365575156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,64,0,1,float16,float16,0,0.8190240065256754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,64,128,1,fp8,fp8,0,0.2297333280245463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,64,128,1,float16,float16,0,0.24259734153747559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,64,0,1,fp8,fp8,0,0.7692533334096273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,64,0,1,float16,fp8,0,0.823088010152181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,64,128,1,float16,fp8,0,0.2444480061531067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,64,0,1,float16,float16,0,0.8226986726125082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,64,128,1,fp8,fp8,0,0.23439466953277588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,64,128,1,float16,float16,0,0.24845333894093832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,64,0,1,float16,fp8,0,0.827338695526123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,64,0,1,fp8,fp8,0,0.7707520325978597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,64,128,1,float16,fp8,0,0.25066665808359784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,64,0,1,float16,float16,0,0.8290239969889323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,64,128,1,fp8,fp8,0,0.23824532826741537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,64,128,1,float16,float16,0,0.2571733395258586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,64,0,1,float16,fp8,0,0.8325760364532471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,64,0,1,fp8,fp8,0,0.7775786717732748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,64,128,1,float16,fp8,0,0.2598080039024353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,64,0,1,float16,float16,0,0.8391306400299072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,64,128,1,fp8,fp8,0,0.2473120093345642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,64,128,1,float16,float16,0,0.15633066495259604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,64,128,1,float16,fp8,0,0.15879467129707336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,64,0,1,float16,fp8,0,0.844480037689209
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,64,0,1,float16,float16,0,0.47730668385823566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,64,0,1,fp8,fp8,0,0.7859360376993815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,64,128,1,fp8,fp8,0,0.15429866313934326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,64,128,1,float16,float16,0,0.13707733154296875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,64,0,1,float16,fp8,0,0.48205868403116864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,64,0,1,fp8,fp8,0,0.44923198223114014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,64,128,1,float16,fp8,0,0.1365386644999186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,64,0,1,float16,float16,0,0.45662399133046466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,64,128,1,fp8,fp8,0,0.1291093329588572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,64,0,1,float16,fp8,0,0.4551733334859212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,64,128,1,float16,float16,0,0.13797866304715475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,64,0,1,fp8,fp8,0,0.42466668287913006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,64,128,1,float16,fp8,0,0.1381280024846395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,64,128,1,fp8,fp8,0,0.1323040028413137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,64,0,1,float16,float16,0,0.45530132452646893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,64,128,1,float16,float16,0,0.1397760013739268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,64,0,1,float16,fp8,0,0.4609493414560954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,64,0,1,fp8,fp8,0,0.42725332578023273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,64,128,1,float16,fp8,0,0.1421280006567637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,64,128,1,fp8,fp8,0,0.13821333646774292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,64,0,1,float16,float16,0,0.45843732357025146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,64,128,1,float16,float16,0,0.14589333534240723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,64,0,1,float16,fp8,0,0.46276267369588214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,64,0,1,fp8,fp8,0,0.4341493447621663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,64,128,1,float16,fp8,0,0.148117333650589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,64,128,1,fp8,fp8,0,0.14453333616256714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,64,0,1,float16,float16,0,0.4662026564280192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,64,128,1,float16,float16,0,0.11358933647473653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,64,0,1,float16,fp8,0,0.46933333079020184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,64,0,1,fp8,fp8,0,0.4389760096867879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,64,128,1,float16,fp8,0,0.11317867040634155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,64,0,1,float16,float16,0,0.3008906642595927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,64,128,1,fp8,fp8,0,0.10943466424942017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,64,128,1,float16,float16,0,0.11162666479746501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,64,0,1,float16,fp8,0,0.3027519981066386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,64,0,1,fp8,fp8,0,0.28138667345046997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,64,128,1,float16,fp8,0,0.11343466242154439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,64,128,1,fp8,fp8,0,0.10757866501808167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,64,0,1,float16,float16,0,0.29997867345809937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,64,128,1,float16,float16,0,0.11335999766985576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,64,0,1,float16,fp8,0,0.2999359965324402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,64,0,1,fp8,fp8,0,0.2797759970029195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,64,128,1,float16,fp8,0,0.11335999766985576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,64,128,1,fp8,fp8,0,0.10937600334485371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,64,0,1,float16,float16,0,0.3007146716117859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,64,128,1,float16,float16,0,0.11162133018175761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,64,0,1,fp8,fp8,0,0.2797120014826457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,64,0,1,float16,fp8,0,0.2993386586507161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,64,128,1,float16,fp8,0,0.11154133081436157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,64,128,1,fp8,fp8,0,0.10735999544461568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,64,0,1,float16,float16,0,0.2998186747233073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,64,128,1,float16,float16,0,0.11322666207949321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,64,0,1,fp8,fp8,0,0.2776640057563782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,64,0,1,float16,fp8,0,0.3006613254547119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,64,128,1,float16,fp8,0,0.11160000165303548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,64,128,1,fp8,fp8,0,0.10918399691581726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,64,0,1,float16,float16,0,0.30114134152730304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,64,0,1,float16,fp8,0,0.2984480063120524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,64,0,1,fp8,fp8,0,0.2792213360468547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,64,128,1,float16,float16,0,0.645466685295105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,64,128,1,fp8,fp8,0,0.6021813154220581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,64,128,1,float16,fp8,0,0.6507306496302286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,64,0,1,float16,float16,0,1.8494240442911785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,64,128,1,float16,float16,0,0.653711994489034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,64,0,1,float16,fp8,0,1.8521013259887695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,64,0,1,fp8,fp8,0,1.7136054039001465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,64,128,1,float16,fp8,0,0.6589226722717285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,64,128,1,fp8,fp8,0,0.61353600025177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,64,0,1,float16,float16,0,1.8568746248881023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,64,128,1,float16,float16,0,0.6658186515172323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,64,0,1,fp8,fp8,0,1.7268853187561035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,64,0,1,float16,fp8,0,1.8622080485026042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,64,128,1,float16,fp8,0,0.6737706661224365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,64,128,1,fp8,fp8,0,0.6297866503397623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,64,0,1,float16,float16,0,1.872693379720052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,64,128,1,float16,float16,0,0.688373327255249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,64,0,1,float16,fp8,0,1.8770453135172527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,64,0,1,fp8,fp8,0,1.7422240575154622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,64,128,1,float16,fp8,0,0.6963520050048828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,64,128,1,fp8,fp8,0,0.6573973496754965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,64,0,1,float16,float16,0,1.8958133061726887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,64,128,1,float16,float16,0,0.3840106725692749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,64,128,1,float16,fp8,0,0.39159464836120605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,64,0,1,float16,float16,0,1.0084160168965657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,64,0,1,float16,fp8,0,1.9049919446309407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,64,0,1,fp8,fp8,0,1.7694506645202637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,64,128,1,fp8,fp8,0,0.3717386722564697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,64,128,1,float16,float16,0,0.3371146519978841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,64,0,1,float16,fp8,0,1.0174132982889812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,64,0,1,fp8,fp8,0,0.9450506369272867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,64,128,1,float16,fp8,0,0.3406453529993693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,64,0,1,float16,float16,0,0.9581546783447266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,64,128,1,fp8,fp8,0,0.320357342561086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,64,128,1,float16,float16,0,0.34112000465393066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,64,0,1,float16,fp8,0,0.9608853658040365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,64,0,1,fp8,fp8,0,0.8925920327504476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,64,128,1,float16,fp8,0,0.34481600920359295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,64,0,1,float16,float16,0,0.96234130859375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,64,128,1,fp8,fp8,0,0.32442132631937665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,64,128,1,float16,float16,0,0.3500746488571167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,64,0,1,float16,fp8,0,0.9639253616333008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,64,0,1,fp8,fp8,0,0.8985546429951986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,64,128,1,float16,fp8,0,0.35392534732818604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,64,0,1,float16,float16,0,0.969210704167684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,64,128,1,fp8,fp8,0,0.332261323928833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,64,128,1,float16,float16,0,0.3585919936498006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,64,0,1,float16,fp8,0,0.9739200274149576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,64,0,1,fp8,fp8,0,0.9052960077921549
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,64,128,1,float16,fp8,0,0.3657066822052002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,64,0,1,float16,float16,0,0.9805333614349365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,64,128,1,fp8,fp8,0,0.3425653378168742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,64,128,1,float16,float16,0,0.21033066511154175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,64,0,1,float16,fp8,0,0.9877066612243652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,64,0,1,fp8,fp8,0,0.9162133534749349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,64,0,1,float16,float16,0,0.5420159896214803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,64,128,1,float16,fp8,0,0.21381332476933798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,64,128,1,fp8,fp8,0,0.20595733324686685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,64,0,1,float16,fp8,0,0.547765334447225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,64,128,1,float16,float16,0,0.1832053263982137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,64,0,1,fp8,fp8,0,0.5089279810587565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,64,128,1,float16,fp8,0,0.18443200985590616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,64,128,1,fp8,fp8,0,0.17809067169825235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,64,0,1,float16,float16,0,0.5103520154953003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,64,0,1,fp8,fp8,0,0.4802986780802409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,64,128,1,float16,float16,0,0.185205340385437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,64,0,1,float16,fp8,0,0.514245351155599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,64,128,1,float16,fp8,0,0.18674665689468384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,64,128,1,fp8,fp8,0,0.1799466609954834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,64,0,1,float16,float16,0,0.5141013463338217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,64,0,1,fp8,fp8,0,0.4837813377380371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,64,128,1,float16,float16,0,0.1908586621284485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,64,0,1,float16,fp8,0,0.5145013332366943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,64,128,1,float16,fp8,0,0.19339734315872192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,64,128,1,fp8,fp8,0,0.18513067563374838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,64,0,1,float16,float16,0,0.518671989440918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,64,0,1,fp8,fp8,0,0.48820265134175617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,64,128,1,float16,float16,0,0.1965120037396749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,64,0,1,float16,fp8,0,0.522490660349528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,64,128,1,fp8,fp8,0,0.19181867440541586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,64,128,1,float16,fp8,0,0.20172800620396933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,64,0,1,float16,float16,0,0.5277066628138224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,64,128,1,float16,float16,0,0.12273599704106648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,64,0,1,fp8,fp8,0,0.4957546790440877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,64,0,1,float16,fp8,0,0.531823992729187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,64,128,1,float16,fp8,0,0.12426666418711345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,64,128,1,fp8,fp8,0,0.12169067064921062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,64,0,1,float16,float16,0,0.3084213336308797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,64,128,1,float16,float16,0,0.10921600461006165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,64,0,1,fp8,fp8,0,0.29135467608769733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,64,0,1,float16,fp8,0,0.31138134002685547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,64,0,1,float16,float16,0,0.2934826612472534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,64,128,1,float16,fp8,0,0.10921066999435425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,64,128,1,fp8,fp8,0,0.10243733723958333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,64,128,1,float16,float16,0,0.10930132865905762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,64,0,1,fp8,fp8,0,0.27158933877944946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,64,0,1,float16,fp8,0,0.29393599430720013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,64,0,1,float16,float16,0,0.2946880062421163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,64,128,1,float16,fp8,0,0.11146666606267293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,64,128,1,fp8,fp8,0,0.10319999853769939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,64,0,1,float16,fp8,0,0.29409066836039227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,64,128,1,float16,float16,0,0.11141332983970642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,64,0,1,fp8,fp8,0,0.2737013300259908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,64,0,1,float16,float16,0,0.2953919967015584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,64,128,1,fp8,fp8,0,0.10515200098355611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,64,128,1,float16,fp8,0,0.11181867122650146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,64,0,1,float16,fp8,0,0.2969013253847758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,64,0,1,fp8,fp8,0,0.27558932701746625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,64,128,1,float16,float16,0,0.11358400185902913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,64,0,1,float16,float16,0,0.29808000723520917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,64,128,1,float16,fp8,0,0.11636267105738322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,64,128,1,fp8,fp8,0,0.11382933457692464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,64,0,1,float16,fp8,0,0.3028800090154012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,64,128,1,float16,float16,0,0.08880533774693807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,64,0,1,fp8,fp8,0,0.28165332476298016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,64,0,1,float16,float16,0,0.2002240022023519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,64,128,1,float16,fp8,0,0.088837335507075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,64,128,1,fp8,fp8,0,0.08526399731636047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,64,0,1,float16,fp8,0,0.20097599426905313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,64,0,1,fp8,fp8,0,0.18731200695037842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,64,128,1,float16,float16,0,0.08871466914812724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,64,0,1,float16,float16,0,0.20177600781122842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,64,128,1,float16,fp8,0,0.08888000249862671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,64,128,1,fp8,fp8,0,0.08461866776148479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,64,0,1,float16,fp8,0,0.20029866695404053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,64,0,1,fp8,fp8,0,0.19051732619603476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,64,128,1,float16,float16,0,0.08874133229255676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,64,0,1,float16,float16,0,0.2007733384768168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,64,128,1,float16,fp8,0,0.0869653324286143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,64,128,1,fp8,fp8,0,0.08462400237719218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,64,0,1,float16,fp8,0,0.20076799392700195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,64,128,1,float16,float16,0,0.08683733145395915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,64,0,1,fp8,fp8,0,0.18837867180506387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,64,0,1,float16,float16,0,0.20042133331298828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,64,128,1,float16,fp8,0,0.08690133690834045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,64,128,1,fp8,fp8,0,0.08488532900810242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,64,0,1,float16,fp8,0,0.19966399669647217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,64,128,1,float16,float16,0,0.08716266353925069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,64,0,1,fp8,fp8,0,0.1876586675643921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,64,0,1,float16,float16,0,0.1994453271230062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,64,128,1,float16,fp8,0,0.08717333277066548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,64,128,1,fp8,fp8,0,0.08468799789746602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,64,0,1,float16,fp8,0,0.20195732514063516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,64,0,1,fp8,fp8,0,0.18793066342671713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,64,128,1,float16,float16,0,0.8526026407877604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,64,128,1,float16,fp8,0,0.8583359718322754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,64,128,1,fp8,fp8,0,0.793941338857015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,64,0,1,float16,float16,0,1.8585707346598308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,64,128,1,float16,float16,0,0.8696853319803873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,64,0,1,float16,fp8,0,1.8637706438700359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,64,0,1,fp8,fp8,0,1.7241439819335938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,64,128,1,float16,fp8,0,0.8744693597157797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,64,128,1,fp8,fp8,0,0.8133227030436198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,64,0,1,float16,float16,0,1.8769920667012532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,64,128,1,float16,float16,0,0.8881119887034098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,64,0,1,fp8,fp8,0,1.7430240313212078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,64,0,1,float16,fp8,0,1.8796693483988445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,64,128,1,float16,fp8,0,0.892906665802002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,64,128,1,fp8,fp8,0,0.8323893547058105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,64,0,1,float16,float16,0,1.8951093355814617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,64,128,1,float16,float16,0,0.9182079633076986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,64,0,1,fp8,fp8,0,1.761573314666748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,64,0,1,float16,fp8,0,1.9016319910685222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,64,128,1,float16,fp8,0,0.9259626865386963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,64,128,1,fp8,fp8,0,0.870405356089274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,64,0,1,float16,float16,0,1.9276639620463054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,64,128,1,float16,float16,0,0.4962186813354492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,64,0,1,float16,fp8,0,1.9367787043253581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,64,128,1,float16,fp8,0,0.503333330154419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,64,0,1,fp8,fp8,0,1.7994453112284343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,64,0,1,float16,float16,0,1.01582932472229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,64,128,1,fp8,fp8,0,0.478490670522054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,64,128,1,float16,float16,0,0.43347732226053876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,64,0,1,float16,fp8,0,1.0238666534423828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,64,0,1,fp8,fp8,0,0.9540533224741617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,64,0,1,float16,float16,0,0.9518240292867025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,64,128,1,float16,fp8,0,0.4370559851328532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,64,128,1,fp8,fp8,0,0.4071040153503418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,64,128,1,float16,float16,0,0.4395466645558675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,64,0,1,float16,fp8,0,0.9543840090433756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,64,0,1,fp8,fp8,0,0.8841760158538818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,64,128,1,float16,fp8,0,0.4434826771418254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,64,0,1,float16,float16,0,0.9545599619547526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,64,128,1,fp8,fp8,0,0.4132320086161296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,64,128,1,float16,float16,0,0.4472959836324056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,64,0,1,float16,fp8,0,0.9612267017364502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,64,0,1,fp8,fp8,0,0.890127976735433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,64,0,1,float16,float16,0,0.9655306339263916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,64,128,1,float16,fp8,0,0.4525173505147298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,64,128,1,fp8,fp8,0,0.4216639995574951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,64,128,1,float16,float16,0,0.4617600043614705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,64,0,1,float16,fp8,0,0.9693546295166016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,64,0,1,fp8,fp8,0,0.9005813598632812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,64,0,1,float16,float16,0,0.9833013216654459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,64,128,1,float16,fp8,0,0.46797335147857666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,64,128,1,fp8,fp8,0,0.4389439821243286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,64,128,1,float16,float16,0,0.2614826758702596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,64,0,1,float16,fp8,0,0.9887146949768066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,64,0,1,float16,float16,0,0.5330559810002645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,64,0,1,fp8,fp8,0,0.9168746471405029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,64,128,1,float16,fp8,0,0.26712000370025635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,64,128,1,fp8,fp8,0,0.2548533280690511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,64,128,1,float16,float16,0,0.22593067089716592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,64,0,1,fp8,fp8,0,0.5036213397979736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,64,0,1,float16,fp8,0,0.5390986601511637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,64,128,1,float16,fp8,0,0.2283253272374471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,64,0,1,float16,float16,0,0.4983733495076497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,64,128,1,fp8,fp8,0,0.21790399154027304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,64,128,1,float16,float16,0,0.2302293380101522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,64,0,1,float16,fp8,0,0.499290664990743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,64,0,1,fp8,fp8,0,0.4678773482640584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,64,128,1,float16,fp8,0,0.2321066657702128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,64,0,1,float16,float16,0,0.502122680346171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,64,128,1,fp8,fp8,0,0.2220159967740377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,64,0,1,float16,fp8,0,0.5030826727549235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,64,128,1,float16,float16,0,0.23703465859095255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,64,0,1,fp8,fp8,0,0.4713386694590251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,64,0,1,float16,float16,0,0.5066933234532675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,64,128,1,float16,fp8,0,0.2387146751085917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,64,128,1,fp8,fp8,0,0.22826667626698813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,64,128,1,float16,float16,0,0.24441067377726236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,64,0,1,float16,fp8,0,0.5112906694412231
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,64,0,1,fp8,fp8,0,0.4767093261082967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,64,0,1,float16,float16,0,0.5178186496098837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,64,128,1,float16,fp8,0,0.24860266844431558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,64,128,1,fp8,fp8,0,0.23592533667882284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,64,0,1,float16,fp8,0,0.519541343053182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,64,128,1,float16,float16,0,0.14342400431632996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,64,0,1,float16,float16,0,0.29498666524887085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,64,0,1,fp8,fp8,0,0.4849066734313965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,64,128,1,float16,fp8,0,0.14653866489728293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,64,128,1,fp8,fp8,0,0.14267200231552124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,64,0,1,fp8,fp8,0,0.2808106740315755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,64,0,1,float16,fp8,0,0.29763199885686237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,64,128,1,float16,float16,0,0.1216319998105367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,64,0,1,float16,float16,0,0.2717919945716858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,64,128,1,float16,fp8,0,0.12388799587885539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,64,128,1,fp8,fp8,0,0.117658664782842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,64,0,1,float16,fp8,0,0.2739253242810567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,64,0,1,fp8,fp8,0,0.25302932659784955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,64,128,1,float16,float16,0,0.12358400225639343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,64,0,1,float16,float16,0,0.2741120060284932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,64,128,1,float16,fp8,0,0.12518399953842163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,64,128,1,fp8,fp8,0,0.11866666873296101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,64,0,1,float16,fp8,0,0.2757813334465027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,64,128,1,float16,float16,0,0.12569600343704224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,64,0,1,fp8,fp8,0,0.2571093241373698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,64,0,1,float16,float16,0,0.27562133471171063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,64,128,1,float16,fp8,0,0.12811733285586038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,64,128,1,fp8,fp8,0,0.12457600235939026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,64,0,1,float16,fp8,0,0.2815573414166768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,64,128,1,float16,float16,0,0.13201066851615906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,64,0,1,fp8,fp8,0,0.2632053295771281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,64,128,1,float16,fp8,0,0.13429333766301474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,64,0,1,float16,float16,0,0.2842506567637126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,64,128,1,fp8,fp8,0,0.13169067104657492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,64,128,1,float16,float16,0,0.08493333061536153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,64,0,1,float16,fp8,0,0.28618133068084717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,64,0,1,fp8,fp8,0,0.2693386673927307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,64,0,1,float16,float16,0,0.17277334133783975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,64,128,1,float16,fp8,0,0.08681066830952962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,64,128,1,fp8,fp8,0,0.08505066235860188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,64,0,1,float16,fp8,0,0.1753013332684835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,64,128,1,float16,float16,0,0.07709866762161255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,64,0,1,fp8,fp8,0,0.1660373310248057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,64,128,1,float16,fp8,0,0.07866133252779643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,64,128,1,fp8,fp8,0,0.07473066449165344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,64,0,1,float16,float16,0,0.1667893330256144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,64,0,1,float16,fp8,0,0.16774932543436685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,64,128,1,float16,float16,0,0.07784000039100647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,64,0,1,fp8,fp8,0,0.15241600076357523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,64,128,1,float16,fp8,0,0.07854933540026347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,64,128,1,fp8,fp8,0,0.07464000085989635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,64,0,1,float16,float16,0,0.16639999548594156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,64,128,1,float16,float16,0,0.07853866616884868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,64,0,1,float16,fp8,0,0.1669173240661621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,64,0,1,fp8,fp8,0,0.15479466319084167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,64,128,1,float16,fp8,0,0.07875733574231465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,64,128,1,fp8,fp8,0,0.07646933197975159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,64,0,1,float16,float16,0,0.16536532839139303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,64,128,1,float16,float16,0,0.07891199986139934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,64,0,1,fp8,fp8,0,0.15461333592732748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,64,0,1,float16,fp8,0,0.16765334208806357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,64,128,1,float16,fp8,0,0.08080000181992848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,64,128,1,fp8,fp8,0,0.07892799874146779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,64,0,1,float16,float16,0,0.16845333576202393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,64,128,1,float16,float16,0,0.06412800153096516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,64,0,1,fp8,fp8,0,0.15677332878112793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,64,0,1,float16,fp8,0,0.16749866803487143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,64,0,1,float16,float16,0,0.1209333340326945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,64,128,1,float16,fp8,0,0.06250666578610738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,64,128,1,fp8,fp8,0,0.06020266811052958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,64,128,1,float16,float16,0,0.06412266691525777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,64,0,1,float16,fp8,0,0.12148800492286682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,64,0,1,fp8,fp8,0,0.11558399597803752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,64,128,1,float16,fp8,0,0.062181333700815834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,64,0,1,float16,float16,0,0.12150933345158894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,64,128,1,fp8,fp8,0,0.06225066880385081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,64,128,1,float16,float16,0,0.06411733229955037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,64,0,1,float16,fp8,0,0.12262933452924092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,64,0,1,fp8,fp8,0,0.11355732878049214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,64,128,1,float16,fp8,0,0.06425599753856659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,64,128,1,fp8,fp8,0,0.06020799775918325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,64,0,1,float16,float16,0,0.12194666266441345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,64,128,1,float16,float16,0,0.06258666515350342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,64,0,1,float16,fp8,0,0.12248532970746358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,64,0,1,fp8,fp8,0,0.1141973336537679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,64,128,1,float16,fp8,0,0.06404266754786174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,64,0,1,float16,float16,0,0.12124266227086385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,64,128,1,fp8,fp8,0,0.062133332093556724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,64,128,1,float16,float16,0,0.06414400041103363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,64,0,1,float16,fp8,0,0.12176000078519185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,64,0,1,fp8,fp8,0,0.11363200346628825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,64,128,1,float16,fp8,0,0.06413333117961884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,64,128,1,fp8,fp8,0,0.06028266747792562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,64,0,1,float16,float16,0,0.12175466616948445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,64,0,1,float16,fp8,0,0.11961600184440613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,64,0,1,fp8,fp8,0,0.11515200138092041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,64,128,1,float16,float16,0,0.6429920196533203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,64,128,1,fp8,fp8,0,0.5977919896443685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,64,128,1,float16,fp8,0,0.6475573380788168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,64,0,1,float16,float16,0,1.1829813321431477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,64,128,1,float16,float16,0,0.6540160179138184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,64,0,1,float16,fp8,0,1.1894773642222087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,64,0,1,fp8,fp8,0,1.0959146817525227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,64,0,1,float16,float16,0,1.193120002746582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,64,128,1,fp8,fp8,0,0.6092906792958578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,64,128,1,float16,fp8,0,0.6578026612599691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,64,0,1,float16,fp8,0,1.2003626823425293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,64,128,1,float16,float16,0,0.6685919761657715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,64,0,1,fp8,fp8,0,1.107957363128662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,64,0,1,float16,float16,0,1.2091519832611084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,64,128,1,float16,fp8,0,0.6710293292999268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,64,128,1,fp8,fp8,0,0.626309315363566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,64,128,1,float16,float16,0,0.6909120082855225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,64,0,1,float16,fp8,0,1.2107946872711182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,64,0,1,fp8,fp8,0,1.1240639686584473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,64,0,1,float16,float16,0,1.2317279974619548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,64,128,1,float16,fp8,0,0.6953012943267822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,64,128,1,fp8,fp8,0,0.6528053283691406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,64,128,1,float16,float16,0,0.37915201981862384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,64,0,1,float16,fp8,0,1.2365333239237468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,64,0,1,float16,float16,0,0.6587680180867513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,64,0,1,fp8,fp8,0,1.1509653727213542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,64,128,1,float16,fp8,0,0.38528533776601154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,64,128,1,fp8,fp8,0,0.36323734124501544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,64,128,1,float16,float16,0,0.33053867022196454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,64,0,1,float16,fp8,0,0.6655040184656779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,64,0,1,fp8,fp8,0,0.6216799815495809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,64,128,1,float16,fp8,0,0.3323520024617513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,64,0,1,float16,float16,0,0.6096373399098715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,64,128,1,fp8,fp8,0,0.31176533301671344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,64,128,1,float16,float16,0,0.3354026476542155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,64,0,1,fp8,fp8,0,0.5688213507334391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,64,0,1,float16,fp8,0,0.6124746799468994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,64,128,1,float16,fp8,0,0.3372053305308024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,64,128,1,fp8,fp8,0,0.31730133295059204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,64,0,1,float16,float16,0,0.6137226819992065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,64,128,1,float16,float16,0,0.3423413435618083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,64,0,1,fp8,fp8,0,0.5747679869333903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,64,0,1,float16,fp8,0,0.6152960062026978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,64,128,1,float16,fp8,0,0.3465386629104614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,64,128,1,fp8,fp8,0,0.32337067524592084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,64,0,1,float16,float16,0,0.6213173468907675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,64,128,1,float16,float16,0,0.3534880081812541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,64,0,1,fp8,fp8,0,0.5806506474812826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,64,0,1,float16,fp8,0,0.6267786820729574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,64,128,1,float16,fp8,0,0.35865068435668945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,64,128,1,fp8,fp8,0,0.3349119822184245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,64,0,1,float16,float16,0,0.6340159972508749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,64,128,1,float16,float16,0,0.2020533283551534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,64,0,1,fp8,fp8,0,0.5926026503245035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,64,0,1,float16,fp8,0,0.638314684232076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,64,0,1,float16,float16,0,0.35180266698201496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,64,128,1,float16,fp8,0,0.2057173252105713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,64,128,1,fp8,fp8,0,0.1977120041847229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,64,128,1,float16,float16,0,0.17266666889190674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,64,0,1,float16,fp8,0,0.35650134086608887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,64,0,1,fp8,fp8,0,0.3325546582539876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,64,128,1,float16,fp8,0,0.17479467391967773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,64,128,1,fp8,fp8,0,0.1686506668726603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,64,0,1,float16,float16,0,0.31972267230351764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,64,128,1,float16,float16,0,0.17485866943995157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,64,0,1,float16,fp8,0,0.3235573371251424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,64,0,1,fp8,fp8,0,0.30567999680836994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,64,128,1,fp8,fp8,0,0.16886399189631143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,64,128,1,float16,fp8,0,0.17773866653442383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,64,0,1,float16,float16,0,0.32250134150187176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,64,0,1,float16,fp8,0,0.32474132378896076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,64,128,1,float16,float16,0,0.17945599555969238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,64,0,1,fp8,fp8,0,0.3079520066579183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,64,128,1,float16,fp8,0,0.1827626625696818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,64,0,1,float16,float16,0,0.329258660475413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,64,128,1,fp8,fp8,0,0.17605332533518472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,64,0,1,float16,fp8,0,0.33348266283671063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,64,128,1,float16,float16,0,0.18901866674423218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,64,0,1,fp8,fp8,0,0.312879999478658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,64,0,1,float16,float16,0,0.338368018468221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,64,128,1,float16,fp8,0,0.190938671429952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,64,128,1,fp8,fp8,0,0.1830186645189921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,64,0,1,float16,fp8,0,0.34169598420461017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,64,128,1,float16,float16,0,0.11143466830253601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,64,0,1,float16,float16,0,0.19538666804631552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,64,0,1,fp8,fp8,0,0.3183679978052775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,64,128,1,float16,fp8,0,0.1146506667137146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,64,128,1,fp8,fp8,0,0.11154666543006897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,64,0,1,float16,fp8,0,0.1977120041847229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,64,0,1,fp8,fp8,0,0.18823466698328653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,64,128,1,float16,float16,0,0.09814400474230449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,64,0,1,float16,float16,0,0.18186134099960327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,64,128,1,float16,fp8,0,0.09841066598892212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,64,128,1,fp8,fp8,0,0.09250666697820027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,64,0,1,float16,fp8,0,0.18134399255116782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,64,0,1,fp8,fp8,0,0.16904000441233316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,64,128,1,float16,float16,0,0.09714667002360027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,64,0,1,float16,float16,0,0.18119466304779053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,64,128,1,float16,fp8,0,0.0990613301595052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,64,128,1,fp8,fp8,0,0.09278399745623271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,64,0,1,float16,fp8,0,0.18211734294891357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,64,128,1,float16,float16,0,0.09928533434867859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,64,0,1,fp8,fp8,0,0.17069333791732788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,64,0,1,float16,float16,0,0.1848906675974528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,64,128,1,float16,fp8,0,0.10194133718808492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,64,128,1,fp8,fp8,0,0.09483200311660767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,64,0,1,float16,fp8,0,0.1837973395983378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,64,128,1,float16,float16,0,0.10294933120409648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,64,0,1,fp8,fp8,0,0.17310933272043863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,64,0,1,float16,float16,0,0.1880799929300944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,64,128,1,float16,fp8,0,0.10531733433405559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,64,128,1,fp8,fp8,0,0.10293866197268169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,64,0,1,float16,fp8,0,0.18965866168340048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,64,128,1,float16,float16,0,0.06433066725730896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,64,0,1,fp8,fp8,0,0.1811359922091166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,64,0,1,float16,float16,0,0.1158026655515035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,64,128,1,float16,fp8,0,0.06646933158238728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,64,128,1,fp8,fp8,0,0.06650133430957794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,64,0,1,float16,fp8,0,0.11924800276756287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,64,0,1,fp8,fp8,0,0.11373866597811381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,64,128,1,float16,float16,0,0.062122667829195656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,64,0,1,float16,float16,0,0.1120853324731191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,64,128,1,float16,fp8,0,0.062405332922935486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,64,128,1,fp8,fp8,0,0.06025599936644236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,64,0,1,float16,fp8,0,0.11365333199501038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,64,0,1,fp8,fp8,0,0.10544000069300334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,64,128,1,float16,float16,0,0.06217599908510844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,64,0,1,float16,float16,0,0.11366400122642517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,64,128,1,float16,fp8,0,0.06214933097362518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,64,128,1,fp8,fp8,0,0.06017066538333893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,64,0,1,float16,fp8,0,0.11370133360226949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,64,128,1,float16,float16,0,0.0622026671965917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,64,0,1,fp8,fp8,0,0.10553066929181416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,64,0,1,float16,float16,0,0.11410133043924968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,64,128,1,float16,fp8,0,0.062496001521746315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,64,128,1,fp8,fp8,0,0.060453335444132485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,64,0,1,float16,fp8,0,0.11460266510645549
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,64,0,1,fp8,fp8,0,0.10655466715494792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,64,128,1,float16,float16,0,0.06260266900062561
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,64,0,1,float16,float16,0,0.11379200220108032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,64,128,1,float16,fp8,0,0.06454400221506755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,64,128,1,fp8,fp8,0,0.062208001812299095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,64,0,1,float16,fp8,0,0.11570133765538533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,64,128,1,float16,float16,0,0.054010664423306785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,64,0,1,fp8,fp8,0,0.1095306674639384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,64,0,1,float16,float16,0,0.08475200335184734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,64,128,1,float16,fp8,0,0.054144000013669334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,64,128,1,fp8,fp8,0,0.052149335543314614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,64,0,1,float16,fp8,0,0.0849226713180542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,64,128,1,float16,float16,0,0.05388266841570536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,64,0,1,fp8,fp8,0,0.08102933565775554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,64,0,1,float16,float16,0,0.08546132842699687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,64,128,1,float16,fp8,0,0.05410666763782501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,64,128,1,fp8,fp8,0,0.05176533261934916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,64,0,1,float16,fp8,0,0.08539733290672302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,64,128,1,float16,float16,0,0.05409066875775655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,64,0,1,fp8,fp8,0,0.08067200084527333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,64,0,1,float16,float16,0,0.08543999989827473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,64,128,1,float16,fp8,0,0.05390933156013489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,64,128,1,fp8,fp8,0,0.05193600058555603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,64,0,1,float16,fp8,0,0.08526399731636047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,64,0,1,fp8,fp8,0,0.08089600006739299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,64,128,1,float16,float16,0,0.05395199855168661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,64,0,1,float16,float16,0,0.08475200335184734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,64,128,1,float16,fp8,0,0.05409066875775655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,64,128,1,fp8,fp8,0,0.052111998200416565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,64,0,1,float16,fp8,0,0.08509332935015361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,64,128,1,float16,float16,0,0.05425600210825602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,64,0,1,fp8,fp8,0,0.08077333370844524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,64,0,1,float16,float16,0,0.084906667470932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,64,128,1,float16,fp8,0,0.054383998115857445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,64,128,1,fp8,fp8,0,0.05182399849096934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,64,0,1,float16,fp8,0,0.08489066362380981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,64,0,1,fp8,fp8,0,0.08086933195590973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,64,128,1,float16,float16,0,0.8528479735056559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,64,128,1,float16,fp8,0,0.8551680246988932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,64,0,1,float16,float16,0,1.2772106329600017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,64,128,1,fp8,fp8,0,0.7830346425374349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,64,0,1,float16,fp8,0,1.2751680215199788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,64,0,1,fp8,fp8,0,1.1722933451334636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,64,128,1,float16,float16,0,0.8733173211415609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,64,128,1,float16,fp8,0,0.8735520044962565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,64,0,1,float16,float16,0,1.291962703069051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,64,128,1,fp8,fp8,0,0.8012373447418213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,64,0,1,float16,fp8,0,1.2950666745503743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,64,128,1,float16,float16,0,0.892842690149943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,64,0,1,fp8,fp8,0,1.1871999899546306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,64,128,1,float16,fp8,0,0.890992005666097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,64,0,1,float16,float16,0,1.3124213218688965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,64,128,1,fp8,fp8,0,0.8171466986338297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,64,0,1,float16,fp8,0,1.3145333131154378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,64,128,1,float16,float16,0,0.9147786299387614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,64,0,1,fp8,fp8,0,1.2049973011016846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,64,0,1,float16,float16,0,1.3395147323608398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,64,128,1,float16,fp8,0,0.9200053215026855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,64,128,1,fp8,fp8,0,0.8543039957682291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,64,0,1,float16,fp8,0,1.3435200055440266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,64,128,1,float16,float16,0,0.4954613447189331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,64,0,1,float16,float16,0,0.7151839733123779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,64,0,1,fp8,fp8,0,1.2440693378448486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,64,128,1,float16,fp8,0,0.5013920068740845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,64,128,1,fp8,fp8,0,0.4761013189951579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,64,0,1,float16,fp8,0,0.7226453622182211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,64,128,1,float16,float16,0,0.42877864837646484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,64,0,1,fp8,fp8,0,0.6755572954813639
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,64,0,1,float16,float16,0,0.6478506724039713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,64,128,1,fp8,fp8,0,0.4012800057729085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,64,128,1,float16,fp8,0,0.43219733238220215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,64,0,1,float16,fp8,0,0.6511093378067017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,64,0,1,fp8,fp8,0,0.6014080047607422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,64,128,1,float16,float16,0,0.43700798352559406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,64,0,1,float16,float16,0,0.6535893281300863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,64,128,1,float16,fp8,0,0.44225064913431805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,64,128,1,fp8,fp8,0,0.4108373324076335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,64,0,1,float16,fp8,0,0.6572373310724894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,64,0,1,fp8,fp8,0,0.6086133321126302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,64,128,1,float16,float16,0,0.44656534989674884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,64,0,1,float16,float16,0,0.6630933284759521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,64,128,1,float16,fp8,0,0.44861332575480145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,64,128,1,fp8,fp8,0,0.4185760021209717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,64,0,1,float16,fp8,0,0.6681919892628988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,64,128,1,float16,float16,0,0.45817601680755615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,64,0,1,fp8,fp8,0,0.6190453370412191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,64,0,1,float16,float16,0,0.6778133710225424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,64,128,1,float16,fp8,0,0.4663733243942261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,64,128,1,fp8,fp8,0,0.4350293477376302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,64,128,1,float16,float16,0,0.25887467463811237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,64,0,1,float16,fp8,0,0.6853386561075846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,64,0,1,float16,float16,0,0.37326931953430176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,64,0,1,fp8,fp8,0,0.6349920034408569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,64,128,1,float16,fp8,0,0.26310400168100995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,64,128,1,fp8,fp8,0,0.25092266003290814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,64,0,1,float16,fp8,0,0.37944531440734863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,64,128,1,float16,float16,0,0.22270933787027994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,64,0,1,fp8,fp8,0,0.35421331723531085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,64,128,1,float16,fp8,0,0.22363734245300293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,64,128,1,fp8,fp8,0,0.21175465981165567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,64,0,1,float16,float16,0,0.33586664994557697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,64,128,1,float16,float16,0,0.2246453364690145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,64,0,1,float16,fp8,0,0.3388479948043823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,64,0,1,fp8,fp8,0,0.3163253267606099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,64,128,1,float16,fp8,0,0.22839999198913574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,64,128,1,fp8,fp8,0,0.21684267123540243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,64,0,1,float16,float16,0,0.33814934889475506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,64,128,1,float16,float16,0,0.2317919929822286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,64,0,1,float16,fp8,0,0.34324268500010174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,64,0,1,fp8,fp8,0,0.3213493426640828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,64,128,1,fp8,fp8,0,0.2215893268585205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,64,128,1,float16,fp8,0,0.23432000478108725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,64,0,1,float16,float16,0,0.34513600667317706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,64,0,1,fp8,fp8,0,0.3263466755549113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,64,0,1,float16,fp8,0,0.348965326944987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,64,128,1,float16,float16,0,0.24009066820144653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,64,128,1,fp8,fp8,0,0.22946133216222128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,64,128,1,float16,fp8,0,0.244159996509552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,64,0,1,float16,float16,0,0.35512534777323407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,64,128,1,float16,float16,0,0.1384160021940867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,64,0,1,fp8,fp8,0,0.33447468280792236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,64,0,1,float16,fp8,0,0.3598826726277669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,64,0,1,float16,float16,0,0.20446399847666422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,64,128,1,float16,fp8,0,0.1416106621424357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,64,128,1,fp8,fp8,0,0.13806933164596558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,64,0,1,float16,fp8,0,0.20674665768941244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,64,128,1,float16,float16,0,0.11692800124486287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,64,0,1,fp8,fp8,0,0.1946186621983846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,64,128,1,float16,fp8,0,0.1179039975007375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,64,128,1,fp8,fp8,0,0.11078400413195293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,64,0,1,float16,float16,0,0.18162665764490762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,64,128,1,float16,float16,0,0.11868266264597575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,64,0,1,float16,fp8,0,0.18354666233062744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,64,0,1,fp8,fp8,0,0.1687999963760376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,64,128,1,float16,fp8,0,0.11975466211636861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,64,128,1,fp8,fp8,0,0.11435199777285258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,64,0,1,float16,float16,0,0.18200532595316568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,64,128,1,float16,float16,0,0.11995733777681987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,64,0,1,float16,fp8,0,0.1841813325881958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,64,0,1,fp8,fp8,0,0.1715679963429769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,64,128,1,float16,fp8,0,0.12191999951998393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,64,128,1,fp8,fp8,0,0.11955733100573222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,64,0,1,float16,float16,0,0.18470933039983115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,64,0,1,float16,fp8,0,0.1872160037358602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,64,128,1,float16,float16,0,0.12642133235931396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,64,0,1,fp8,fp8,0,0.17698132991790771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,64,128,1,float16,fp8,0,0.13005333145459494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,64,128,1,fp8,fp8,0,0.12795733412106833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,64,0,1,float16,float16,0,0.19267733891805014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,64,0,1,float16,fp8,0,0.19210133949915567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,64,128,1,float16,float16,0,0.07859200239181519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,64,0,1,float16,float16,0,0.11379200220108032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,64,128,1,float16,fp8,0,0.08045866588751475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,64,0,1,fp8,fp8,0,0.18515199422836304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,64,128,1,fp8,fp8,0,0.08091199894746144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,64,0,1,float16,fp8,0,0.11504532893498738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,64,0,1,fp8,fp8,0,0.11351466178894043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,64,128,1,float16,float16,0,0.07044800122578938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,64,0,1,float16,float16,0,0.10600533088048299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,64,128,1,float16,fp8,0,0.07242133220036824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,64,128,1,fp8,fp8,0,0.06843199829260509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,64,0,1,float16,fp8,0,0.10770666599273682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,64,0,1,fp8,fp8,0,0.10135466853777568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,64,128,1,float16,float16,0,0.07065066695213318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,64,0,1,float16,float16,0,0.10732266306877136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,64,128,1,float16,fp8,0,0.07292266686757405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,64,128,1,fp8,fp8,0,0.06846933563550313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,64,0,1,float16,fp8,0,0.10752532879511516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,64,128,1,float16,float16,0,0.07338666419188182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,64,0,1,fp8,fp8,0,0.10134933392206828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,64,0,1,float16,float16,0,0.10913599530855815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,64,128,1,float16,fp8,0,0.07436800003051758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,64,128,1,fp8,fp8,0,0.07018666466077168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,64,0,1,float16,fp8,0,0.1076800028483073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,64,0,1,fp8,fp8,0,0.1034879982471466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,64,128,1,float16,float16,0,0.07478400071461995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,64,0,1,float16,float16,0,0.11032533645629883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,64,128,1,float16,fp8,0,0.07461333274841309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,64,128,1,fp8,fp8,0,0.07116800049940745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,64,0,1,float16,fp8,0,0.10991467038790385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,64,128,1,float16,float16,0,0.04780266682306925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,64,0,1,fp8,fp8,0,0.1051626702149709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,64,0,1,float16,float16,0,0.0734559992949168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,64,128,1,float16,fp8,0,0.049728001157442726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,64,128,1,fp8,fp8,0,0.04788800080617269
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,64,0,1,float16,fp8,0,0.0745600014925003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,64,0,1,fp8,fp8,0,0.07223999996980031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,64,128,1,float16,float16,0,0.0461706668138504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,64,0,1,float16,float16,0,0.07227199772993724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,64,128,1,float16,fp8,0,0.04594666759173075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,64,128,1,fp8,fp8,0,0.04561600089073181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,64,0,1,float16,fp8,0,0.07267733414967854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,64,0,1,fp8,fp8,0,0.06654933094978333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,64,128,1,float16,float16,0,0.04578666885693868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,64,0,1,float16,float16,0,0.07111466427644093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,64,128,1,float16,fp8,0,0.047728002071380615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,64,128,1,fp8,fp8,0,0.04568000137805939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,64,0,1,float16,fp8,0,0.07127999762694041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,64,0,1,fp8,fp8,0,0.06859733164310455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,64,128,1,float16,float16,0,0.047695999344189964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,64,0,1,float16,float16,0,0.07060266534487407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,64,128,1,float16,fp8,0,0.04660266637802124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,64,128,1,fp8,fp8,0,0.04590400060017904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,64,0,1,float16,fp8,0,0.07285333176453908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,64,0,1,fp8,fp8,0,0.06853866577148438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,64,128,1,float16,float16,0,0.048063998421033226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,64,0,1,float16,float16,0,0.07258666555086772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,64,128,1,float16,fp8,0,0.04772266745567322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,64,128,1,fp8,fp8,0,0.045935998360315956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,64,0,1,float16,fp8,0,0.07460799813270569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,64,128,1,float16,float16,0,0.037658666570981346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,64,0,1,fp8,fp8,0,0.06879466772079468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,64,0,1,float16,float16,0,0.05428266525268555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,64,128,1,float16,fp8,0,0.039120001097520195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,64,128,1,fp8,fp8,0,0.03779733429352442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,64,0,1,float16,fp8,0,0.054511999090512596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,64,0,1,fp8,fp8,0,0.05354666709899902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,64,128,1,float16,float16,0,0.03770133356253306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,64,128,1,float16,fp8,0,0.039359999199708305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,64,0,1,float16,float16,0,0.05583466589450836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,64,128,1,fp8,fp8,0,0.0376800000667572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,64,0,1,float16,fp8,0,0.0557226687669754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,64,128,1,float16,float16,0,0.037733333806196846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,64,0,1,fp8,fp8,0,0.05223466455936432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,64,128,1,float16,fp8,0,0.037589333951473236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,64,0,1,float16,float16,0,0.05403733253479004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,64,128,1,fp8,fp8,0,0.03763733307520548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,64,128,1,float16,float16,0,0.037802666425704956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,64,0,1,float16,fp8,0,0.05411200225353241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,64,0,1,fp8,fp8,0,0.05209066470464071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,64,128,1,float16,fp8,0,0.03790933390458425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,64,128,1,fp8,fp8,0,0.03753600021203359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,64,0,1,float16,float16,0,0.05586666862169901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,64,0,1,float16,fp8,0,0.054133335749308266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,64,0,1,fp8,fp8,0,0.052517334620157875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,64,128,1,float16,float16,0,0.03799466788768768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,64,0,1,float16,float16,0,0.05409066875775655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,64,128,1,float16,fp8,0,0.03862933317820231
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,64,128,1,fp8,fp8,0,0.037647999823093414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,64,0,1,float16,fp8,0,0.05436266462008158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,64,0,1,fp8,fp8,0,0.053354665637016296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,64,128,1,float16,float16,0,0.6607466538747152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,64,128,1,float16,fp8,0,0.6627466678619385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,64,0,1,float16,float16,0,0.8754560152689616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,64,128,1,fp8,fp8,0,0.6089173158009847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,64,0,1,float16,fp8,0,0.8758719762166342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,64,0,1,fp8,fp8,0,0.8054880301157633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,64,128,1,float16,float16,0,0.6842559973398844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,64,128,1,float16,fp8,0,0.6852959791819254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,64,0,1,float16,float16,0,0.899679978688558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,64,128,1,fp8,fp8,0,0.621018648147583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,64,0,1,float16,fp8,0,0.8976159890492758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,64,0,1,fp8,fp8,0,0.8144000371297201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,64,128,1,float16,float16,0,0.7015626430511475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,64,0,1,float16,float16,0,0.9117919603983561
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,64,128,1,fp8,fp8,0,0.6372213363647461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,64,128,1,float16,fp8,0,0.6986880302429199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,64,0,1,float16,fp8,0,0.910645325978597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,64,0,1,fp8,fp8,0,0.8292160034179688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,64,128,1,float16,float16,0,0.7257226308186849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,64,0,1,float16,float16,0,0.9389066696166992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,64,128,1,float16,fp8,0,0.7165706952412924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,64,128,1,fp8,fp8,0,0.6592533191045126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,64,128,1,float16,float16,0,0.3888213237126668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,64,0,1,float16,fp8,0,0.9288372993469238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,64,0,1,float16,float16,0,0.500058650970459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,64,0,1,fp8,fp8,0,0.8531839847564697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,64,128,1,float16,fp8,0,0.3908799886703491
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,64,128,1,fp8,fp8,0,0.3671040137608846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,64,0,1,float16,fp8,0,0.5017333428064982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,64,0,1,fp8,fp8,0,0.4699999888737996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,64,128,1,float16,float16,0,0.3314186731974284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,64,0,1,float16,float16,0,0.44306135177612305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,64,128,1,float16,fp8,0,0.3341066837310791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,64,128,1,fp8,fp8,0,0.3115946650505066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,64,0,1,float16,fp8,0,0.4420693318049113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,64,128,1,float16,float16,0,0.3371146519978841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,64,0,1,fp8,fp8,0,0.4124906857808431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,64,0,1,float16,float16,0,0.4467200040817261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,64,128,1,float16,fp8,0,0.3400426705678304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,64,128,1,fp8,fp8,0,0.3187093337376912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,64,0,1,float16,fp8,0,0.4500693480173747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,64,0,1,fp8,fp8,0,0.41790934403737384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,64,128,1,float16,float16,0,0.3457813262939453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,64,0,1,float16,float16,0,0.4562559922536214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,64,128,1,float16,fp8,0,0.3497920036315918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,64,128,1,fp8,fp8,0,0.3245760003725688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,64,0,1,float16,fp8,0,0.45878398418426514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,64,128,1,float16,float16,0,0.3569493293762207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,64,0,1,fp8,fp8,0,0.4246293306350708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,64,0,1,float16,float16,0,0.4689600070317586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,64,128,1,float16,fp8,0,0.360703984896342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,64,128,1,fp8,fp8,0,0.33505598704020184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,64,0,1,float16,fp8,0,0.4697173436482747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,64,128,1,float16,float16,0,0.2018666664759318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,64,0,1,fp8,fp8,0,0.43590935071309406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,64,0,1,float16,float16,0,0.2626720070838928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,64,128,1,float16,fp8,0,0.20402665932973227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,64,128,1,fp8,fp8,0,0.19506667057673135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,64,128,1,float16,float16,0,0.16912533839543661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,64,0,1,float16,fp8,0,0.26468799511591595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,64,0,1,fp8,fp8,0,0.24689066410064697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,64,128,1,float16,fp8,0,0.17110933860143027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,64,128,1,fp8,fp8,0,0.16485866904258728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,64,0,1,float16,float16,0,0.22678399085998535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,64,128,1,float16,float16,0,0.17086400588353476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,64,0,1,fp8,fp8,0,0.21793067455291748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,64,0,1,float16,fp8,0,0.22870399554570517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,64,128,1,fp8,fp8,0,0.16670932372411093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,64,128,1,float16,fp8,0,0.1727893352508545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,64,0,1,float16,float16,0,0.22977066040039062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,64,128,1,float16,float16,0,0.17747733990351358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,64,0,1,fp8,fp8,0,0.21990933020909628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,64,0,1,float16,fp8,0,0.23111466566721597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,64,128,1,float16,fp8,0,0.1789813240369161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,64,0,1,float16,float16,0,0.23828266064325967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,64,128,1,fp8,fp8,0,0.173199991385142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,64,128,1,float16,float16,0,0.1874026656150818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,64,0,1,float16,fp8,0,0.2365866700808207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,64,0,1,fp8,fp8,0,0.22713599602381387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,64,128,1,float16,fp8,0,0.1880319913228353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,64,128,1,fp8,fp8,0,0.1792693336804708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,64,0,1,float16,float16,0,0.24473067124684653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,64,128,1,float16,float16,0,0.10762666662534077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,64,0,1,fp8,fp8,0,0.23237866163253784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,64,0,1,float16,fp8,0,0.24827200174331665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,64,0,1,float16,float16,0,0.14057067036628723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,64,128,1,fp8,fp8,0,0.10946666200955708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,64,128,1,float16,fp8,0,0.11040000120798747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,64,0,1,float16,fp8,0,0.14222400387128195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,64,128,1,float16,float16,0,0.09322667121887207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,64,0,1,fp8,fp8,0,0.13868266344070435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,64,0,1,float16,float16,0,0.12542399764060974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,64,128,1,float16,fp8,0,0.0934933324654897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,64,128,1,fp8,fp8,0,0.08679999907811482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,64,0,1,fp8,fp8,0,0.11831466356913249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,64,0,1,float16,fp8,0,0.12601066629091898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,64,128,1,float16,float16,0,0.0929813285668691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,64,128,1,fp8,fp8,0,0.08695466319719951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,64,128,1,float16,fp8,0,0.09423999985059102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,64,0,1,float16,float16,0,0.1251359979311625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,64,0,1,float16,fp8,0,0.12611200412114462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,64,128,1,float16,float16,0,0.09523199995358785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,64,0,1,fp8,fp8,0,0.11731200416882832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,64,128,1,fp8,fp8,0,0.09072533249855042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,64,128,1,float16,fp8,0,0.09643200039863586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,64,0,1,float16,float16,0,0.1267359952131907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,64,128,1,float16,float16,0,0.09924266735712688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,64,0,1,fp8,fp8,0,0.12153599659601848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,64,0,1,float16,fp8,0,0.12934933106104532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,64,128,1,float16,fp8,0,0.1009226640065511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,64,128,1,fp8,fp8,0,0.09947733084360759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,64,0,1,float16,float16,0,0.13131733735402426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,64,128,1,float16,float16,0,0.06232533355553945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,64,0,1,float16,fp8,0,0.1322719951470693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,64,0,1,fp8,fp8,0,0.12990933656692505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,64,0,1,float16,float16,0,0.0809386670589447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,64,128,1,float16,fp8,0,0.0621066689491272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,64,128,1,fp8,fp8,0,0.061861331264177956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,64,128,1,float16,float16,0,0.05650666852792104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,64,0,1,float16,fp8,0,0.0835040012995402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,64,0,1,float16,float16,0,0.078575998544693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,64,0,1,fp8,fp8,0,0.07937600215276082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,64,128,1,float16,fp8,0,0.05801600217819214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,64,0,1,float16,fp8,0,0.07850133379300435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,64,0,1,fp8,fp8,0,0.0728959987560908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,64,128,1,fp8,fp8,0,0.05444266895453135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,64,128,1,float16,float16,0,0.058005332946777344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,64,0,1,float16,float16,0,0.07753066718578339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,64,128,1,float16,fp8,0,0.057722667853037514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,64,128,1,fp8,fp8,0,0.055813332398732506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,64,0,1,float16,fp8,0,0.07858133316040039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,64,128,1,float16,float16,0,0.05794133245944977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,64,0,1,fp8,fp8,0,0.0743146687746048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,64,0,1,float16,float16,0,0.07991466422875722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,64,128,1,float16,fp8,0,0.05798399945100149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,64,128,1,fp8,fp8,0,0.056101332108179726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,64,0,1,float16,fp8,0,0.07990399996439616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,64,128,1,float16,float16,0,0.05799466868241628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,64,0,1,fp8,fp8,0,0.07467199862003326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,64,0,1,float16,float16,0,0.07878933350245158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,64,128,1,float16,fp8,0,0.06069866816202799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,64,128,1,fp8,fp8,0,0.05797866483529409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,64,0,1,float16,fp8,0,0.08072000245253245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,64,0,1,fp8,fp8,0,0.07617599765459697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,64,128,1,float16,float16,0,0.043621331453323364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,64,0,1,float16,float16,0,0.05584533512592316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,64,128,1,float16,fp8,0,0.04385066529115041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,64,128,1,fp8,fp8,0,0.04194133480389913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,64,0,1,float16,fp8,0,0.05624000231424967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,64,0,1,fp8,fp8,0,0.052255998055140175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,64,128,1,float16,float16,0,0.041536000867684685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,64,0,1,float16,float16,0,0.052789335449536644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,64,128,1,float16,fp8,0,0.04144000013669332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,64,128,1,fp8,fp8,0,0.04140799989302953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,64,0,1,float16,fp8,0,0.05426666637261709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,64,0,1,fp8,fp8,0,0.05175999800364176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,64,128,1,float16,float16,0,0.04148799926042557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,64,0,1,float16,float16,0,0.053957333167394005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,64,128,1,float16,fp8,0,0.04190400242805481
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,64,128,1,fp8,fp8,0,0.03958933303753535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,64,0,1,float16,fp8,0,0.053802669048309326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,64,0,1,fp8,fp8,0,0.0499893327554067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,64,128,1,float16,float16,0,0.04177066683769226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,64,0,1,float16,float16,0,0.05388266841570536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,64,128,1,float16,fp8,0,0.041493333876132965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,64,128,1,fp8,fp8,0,0.03982933362325033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,64,0,1,float16,fp8,0,0.05394133428732554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,64,0,1,fp8,fp8,0,0.05143466591835022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,64,128,1,float16,float16,0,0.0418453315893809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,64,0,1,float16,float16,0,0.05388266841570536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,64,128,1,float16,fp8,0,0.041797334949175514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,64,128,1,fp8,fp8,0,0.03977599988381068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,64,0,1,float16,fp8,0,0.05551999807357788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,64,0,1,fp8,fp8,0,0.05215999980767568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,64,128,1,float16,float16,0,0.03341866781314214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,64,0,1,float16,float16,0,0.04571199913819631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,64,128,1,float16,fp8,0,0.033786666889985405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,64,128,1,fp8,fp8,0,0.03347733368476232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,64,0,1,float16,fp8,0,0.045968001087506614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,64,0,1,fp8,fp8,0,0.04397333165009817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,64,128,1,float16,float16,0,0.03391999999682108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,64,0,1,float16,float16,0,0.04603200157483419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,64,128,1,fp8,fp8,0,0.033471999069054924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,64,128,1,float16,fp8,0,0.033610666791598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,64,0,1,float16,fp8,0,0.04362666606903076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,64,128,1,float16,float16,0,0.033402666449546814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,64,0,1,fp8,fp8,0,0.04192533095677694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,64,0,1,float16,float16,0,0.04363200068473816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,64,128,1,float16,fp8,0,0.033557333052158356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,64,128,1,fp8,fp8,0,0.03164266546567281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,64,0,1,float16,fp8,0,0.04377066592375437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,64,128,1,float16,float16,0,0.03347733368476232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,64,0,1,fp8,fp8,0,0.042117332418759666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,64,0,1,float16,float16,0,0.043893332282702126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,64,128,1,float16,fp8,0,0.03315199911594391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,64,128,1,fp8,fp8,0,0.03182933231194814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,64,0,1,float16,fp8,0,0.045781334241231285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,64,0,1,fp8,fp8,0,0.04370133578777313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,64,128,1,float16,float16,0,0.03363200028737386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,64,0,1,float16,float16,0,0.04573333263397217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,64,128,1,float16,fp8,0,0.033514666060606636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,64,128,1,fp8,fp8,0,0.03332266708215078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,64,0,1,float16,fp8,0,0.043765331308046974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,64,0,1,fp8,fp8,0,0.04381866753101349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,64,128,1,float16,float16,0,0.770634651184082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,64,128,1,float16,fp8,0,0.7651093006134033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,64,0,1,float16,float16,0,0.9075893561045328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,64,128,1,fp8,fp8,0,0.7163413365681967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,64,0,1,float16,fp8,0,0.8991573651631674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,64,0,1,fp8,fp8,0,0.8444639841715494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,64,128,1,float16,float16,0,0.7774559656778971
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,64,128,1,float16,fp8,0,0.7758986949920654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,64,0,1,float16,float16,0,0.9121493498484293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,64,128,1,fp8,fp8,0,0.7184213002522787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,64,0,1,fp8,fp8,0,0.8425599733988444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,64,128,1,float16,float16,0,0.7938880125681559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,64,0,1,float16,fp8,0,0.9149119853973389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,64,0,1,float16,float16,0,0.9308533668518066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,64,128,1,float16,fp8,0,0.7913173039754232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,64,128,1,fp8,fp8,0,0.8344586690266927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,64,0,1,float16,fp8,0,0.9314133326212565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,64,0,1,fp8,fp8,0,0.9569546381632487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,64,128,1,float16,float16,0,0.7749280134836832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,64,0,1,float16,float16,0,0.9110399881998698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,64,128,1,float16,fp8,0,0.7655786673227946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,64,128,1,fp8,fp8,0,0.8166399796803793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,64,128,1,float16,float16,0,0.4089653491973877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,64,0,1,float16,fp8,0,0.901962677637736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,64,0,1,fp8,fp8,0,0.946453332901001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,64,0,1,float16,float16,0,0.4813813368479411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,64,128,1,float16,fp8,0,0.4038399855295817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,64,128,1,fp8,fp8,0,0.4085119962692261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,64,0,1,float16,fp8,0,0.47354666392008465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,64,128,1,float16,float16,0,0.3967626492182414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,64,0,1,fp8,fp8,0,0.47362132867177326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,64,0,1,float16,float16,0,0.46634666124979657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,64,128,1,float16,fp8,0,0.3917866547902425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,64,128,1,fp8,fp8,0,0.365994652112325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,64,0,1,float16,fp8,0,0.4617600043614705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,64,0,1,fp8,fp8,0,0.42897601922353107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,64,128,1,float16,float16,0,0.3986026843388875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,64,0,1,float16,float16,0,0.4731253385543823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,64,128,1,fp8,fp8,0,0.36953067779541016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,64,128,1,float16,fp8,0,0.39662400881449383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,64,0,1,float16,fp8,0,0.46821868419647217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,64,128,1,float16,float16,0,0.4077013333638509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,64,0,1,fp8,fp8,0,0.4334719975789388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,64,0,1,float16,float16,0,0.4784160057703654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,64,128,1,float16,fp8,0,0.4055733283360799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,64,128,1,fp8,fp8,0,0.40753066539764404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,64,0,1,float16,fp8,0,0.47740264733632404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,64,0,1,fp8,fp8,0,0.46969600518544513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,64,128,1,float16,float16,0,0.396506667137146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,64,0,1,float16,float16,0,0.4692639907201131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,64,128,1,float16,fp8,0,0.3928533395131429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,64,128,1,fp8,fp8,0,0.40150400002797443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,64,128,1,float16,float16,0,0.21466133991877237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,64,0,1,float16,fp8,0,0.4631893237431844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,64,0,1,fp8,fp8,0,0.45956798394521076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,64,0,1,float16,float16,0,0.25386132796605426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,64,128,1,float16,fp8,0,0.21337066094080606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,64,128,1,fp8,fp8,0,0.2146186629931132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,64,0,1,fp8,fp8,0,0.24969067176183066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,64,0,1,float16,fp8,0,0.25009065866470337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,64,128,1,float16,float16,0,0.20781334241231283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,64,128,1,float16,fp8,0,0.20537600914637247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,64,128,1,fp8,fp8,0,0.19196800390879312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,64,0,1,float16,float16,0,0.24426132440567017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,64,0,1,fp8,fp8,0,0.22418133417765299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,64,0,1,float16,fp8,0,0.24335465828577676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,64,128,1,float16,float16,0,0.20878932873408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,64,128,1,float16,fp8,0,0.20828799406687418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,64,128,1,fp8,fp8,0,0.19318934281667074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,64,0,1,float16,float16,0,0.24812267223993936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,64,0,1,fp8,fp8,0,0.22685867547988892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,64,0,1,float16,fp8,0,0.2458666761716207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,64,128,1,float16,float16,0,0.21371199687321982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,64,128,1,float16,fp8,0,0.21383466323216757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,64,128,1,fp8,fp8,0,0.20629332462946573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,64,0,1,float16,float16,0,0.25174399216969806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,64,128,1,float16,float16,0,0.20901334285736084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,64,0,1,float16,fp8,0,0.2521866758664449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,64,0,1,fp8,fp8,0,0.2404693365097046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,64,128,1,float16,fp8,0,0.20770132541656494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,64,0,1,float16,float16,0,0.24716265996297201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,64,128,1,fp8,fp8,0,0.20374399423599243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,64,128,1,float16,float16,0,0.11829333504041036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,64,0,1,float16,fp8,0,0.2452053427696228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,64,0,1,fp8,fp8,0,0.23777065674463907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,64,0,1,float16,float16,0,0.14081600308418274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,64,128,1,float16,fp8,0,0.11678933103879292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,64,128,1,fp8,fp8,0,0.11769066254297893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,64,0,1,float16,fp8,0,0.13909332950909933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,64,0,1,fp8,fp8,0,0.13774399956067404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,64,128,1,float16,float16,0,0.10947733124097188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,64,0,1,float16,float16,0,0.1328213314215342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,64,128,1,float16,fp8,0,0.10936533411343892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,64,128,1,fp8,fp8,0,0.1032533347606659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,64,0,1,float16,fp8,0,0.13235200444857279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,64,128,1,float16,float16,0,0.11134399970372517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,64,0,1,fp8,fp8,0,0.12241066495577495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,64,0,1,float16,float16,0,0.13424000144004822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,64,128,1,float16,fp8,0,0.11132799585660298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,64,128,1,fp8,fp8,0,0.10705600182215373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,64,0,1,float16,fp8,0,0.13351999719937643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,64,128,1,float16,float16,0,0.11399466792742412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,64,0,1,fp8,fp8,0,0.12430933117866516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,64,128,1,float16,fp8,0,0.11567466457684834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,64,0,1,float16,float16,0,0.1379680037498474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,64,128,1,fp8,fp8,0,0.11061333616574605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,64,0,1,float16,fp8,0,0.13647466897964478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,64,0,1,fp8,fp8,0,0.1311893363793691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,64,0,1,float16,float16,0,0.13642133275667825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,64,128,1,float16,fp8,0,0.11329066753387451
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,64,128,1,float16,float16,0,0.11421333750089009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,64,128,1,fp8,fp8,0,0.11297067006429036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,64,0,1,float16,fp8,0,0.1349546710650126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,64,0,1,fp8,fp8,0,0.13311466574668884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,64,128,1,float16,float16,0,0.06538666784763336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,64,0,1,float16,float16,0,0.07868266602357228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,64,128,1,float16,fp8,0,0.06422933439413707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,64,128,1,fp8,fp8,0,0.0684853345155716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,64,0,1,float16,fp8,0,0.0786293347676595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,64,0,1,fp8,fp8,0,0.07909333209196727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,64,128,1,float16,float16,0,0.06418133278687795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,64,0,1,float16,float16,0,0.07669333120187123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,64,128,1,float16,fp8,0,0.06489600241184235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,64,128,1,fp8,fp8,0,0.06044266621271769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,64,0,1,float16,fp8,0,0.0764213353395462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,64,0,1,fp8,fp8,0,0.07212799787521362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,64,128,1,float16,float16,0,0.06429333488146464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,64,0,1,float16,float16,0,0.07664533456166585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,64,128,1,float16,fp8,0,0.06331199904282887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,64,128,1,fp8,fp8,0,0.0601440022389094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,64,0,1,float16,fp8,0,0.0766186664501826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,64,128,1,float16,float16,0,0.06448000172773997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,64,0,1,fp8,fp8,0,0.0726506660381953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,64,0,1,float16,float16,0,0.07684266567230225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,64,128,1,fp8,fp8,0,0.062277331948280334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,64,0,1,float16,fp8,0,0.07794133325417836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,64,128,1,float16,fp8,0,0.06481599807739258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,64,0,1,fp8,fp8,0,0.0743999977906545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,64,128,1,float16,float16,0,0.0634986658891042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,64,0,1,float16,float16,0,0.0773119976123174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,64,128,1,fp8,fp8,0,0.06428800026575725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,64,128,1,float16,fp8,0,0.06339199841022491
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,64,0,1,float16,fp8,0,0.07643199960390727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,64,128,1,float16,float16,0,0.0415786678592364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,64,0,1,fp8,fp8,0,0.07539199789365132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,64,0,1,float16,float16,0,0.05051200091838837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,64,128,1,float16,fp8,0,0.041493333876132965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,64,128,1,fp8,fp8,0,0.04161066561937332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,64,0,1,float16,fp8,0,0.050704002380371094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,64,0,1,fp8,fp8,0,0.05022933085759481
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,64,128,1,float16,float16,0,0.04159999887148539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,64,0,1,float16,float16,0,0.04971733192602793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,64,128,1,float16,fp8,0,0.039781334499518074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,64,128,1,fp8,fp8,0,0.03976533313592275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,64,0,1,float16,fp8,0,0.04995200037956238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,64,0,1,fp8,fp8,0,0.048010667165120445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,64,128,1,float16,float16,0,0.03975466638803482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,64,0,1,float16,float16,0,0.05027199784914652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,64,128,1,float16,fp8,0,0.04186133543650309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,64,128,1,fp8,fp8,0,0.039503999054431915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,64,0,1,float16,fp8,0,0.050111999114354454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,64,0,1,fp8,fp8,0,0.04850666721661886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,64,128,1,float16,float16,0,0.040175999204317726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,64,0,1,float16,float16,0,0.04987733562787374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,64,128,1,fp8,fp8,0,0.039850667119026184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,64,128,1,float16,fp8,0,0.04204800228277842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,64,0,1,float16,fp8,0,0.04969066878159841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,64,0,1,fp8,fp8,0,0.05002133548259735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,64,128,1,float16,float16,0,0.04159466673930486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,64,0,1,float16,float16,0,0.049685334165891014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,64,128,1,float16,fp8,0,0.04166933397452036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,64,128,1,fp8,fp8,0,0.04099733382463455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,64,0,1,float16,fp8,0,0.049957334995269775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,64,0,1,fp8,fp8,0,0.048122664292653404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,64,128,1,float16,float16,0,0.027535999814669292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,64,0,1,float16,float16,0,0.033573334415753685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,64,128,1,float16,fp8,0,0.02719466636578242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,64,128,1,fp8,fp8,0,0.02759466568628947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,64,0,1,float16,fp8,0,0.033674667278925575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,64,0,1,fp8,fp8,0,0.033589333295822144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,64,128,1,float16,float16,0,0.027632000545660656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,64,0,1,float16,float16,0,0.03379733363787333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,64,128,1,float16,fp8,0,0.02722666660944621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,64,128,1,fp8,fp8,0,0.025407999753952026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,64,0,1,float16,fp8,0,0.033610666791598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,64,0,1,fp8,fp8,0,0.03161599983771642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,64,128,1,float16,float16,0,0.02714666724205017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,64,0,1,float16,float16,0,0.033215999603271484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,64,128,1,float16,fp8,0,0.02717866748571396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,64,128,1,fp8,fp8,0,0.025472000241279602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,64,0,1,float16,fp8,0,0.03367999941110611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,64,0,1,fp8,fp8,0,0.031290667752424874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,64,128,1,float16,float16,0,0.028736000259717304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,64,0,1,float16,float16,0,0.03333866596221924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,64,128,1,float16,fp8,0,0.02735999971628189
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,64,128,1,fp8,fp8,0,0.027813332776228588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,64,0,1,float16,fp8,0,0.03331733246644338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,64,0,1,fp8,fp8,0,0.03134933362404505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,64,128,1,float16,float16,0,0.027535999814669292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,64,0,1,float16,float16,0,0.03365866591533025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,64,128,1,float16,fp8,0,0.027237333357334137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,64,128,1,fp8,fp8,0,0.02773333340883255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,64,0,1,float16,fp8,0,0.03344533344109853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,64,0,1,fp8,fp8,0,0.03365866591533025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,64,128,1,float16,float16,0,0.023503998915354412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,64,0,1,float16,float16,0,0.029504001140594482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,64,128,1,float16,fp8,0,0.0230880007147789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,64,128,1,fp8,fp8,0,0.023226665953795116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,64,0,1,float16,fp8,0,0.029167999823888142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,64,0,1,fp8,fp8,0,0.02924266705910365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,64,128,1,float16,float16,0,0.02317333221435547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,64,0,1,float16,float16,0,0.029466666281223297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,64,128,1,float16,fp8,0,0.02325333406527837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,64,128,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,64,0,1,float16,fp8,0,0.029616000751654308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,64,0,1,fp8,fp8,0,0.02720533311367035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,64,128,1,float16,float16,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,64,0,1,float16,float16,0,0.029477333029111225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,64,128,1,float16,fp8,0,0.023077333966890972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,64,128,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,64,0,1,float16,fp8,0,0.029466666281223297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,64,0,1,fp8,fp8,0,0.027637332677841187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,64,128,1,float16,float16,0,0.02350933353106181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,64,0,1,float16,float16,0,0.029477333029111225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,64,128,1,float16,fp8,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,64,128,1,fp8,fp8,0,0.021898667017618816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,64,0,1,float16,fp8,0,0.029157333076000214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,64,0,1,fp8,fp8,0,0.027237333357334137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,64,128,1,float16,float16,0,0.023152001202106476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,64,128,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,64,0,1,float16,float16,0,0.02985599885384242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,64,0,1,float16,fp8,0,0.02935466667016347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,64,128,1,fp8,fp8,0,0.023408000667889912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,64,0,1,fp8,fp8,0,0.029525332152843475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,64,128,1,float16,float16,0,0.7504106362660726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,64,128,1,float16,fp8,0,0.7462826569875082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,64,0,1,float16,float16,0,0.7616960207621256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,64,128,1,fp8,fp8,0,0.691093365351359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,64,0,1,float16,fp8,0,0.7549440066019694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,64,0,1,fp8,fp8,0,0.7052853107452393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,64,128,1,float16,float16,0,0.7579946517944336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,64,128,1,fp8,fp8,0,0.6995680332183838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,64,128,1,float16,fp8,0,0.7536160151163737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,64,0,1,float16,float16,0,0.7694453398386637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,64,0,1,float16,fp8,0,0.7641706466674805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,64,0,1,fp8,fp8,0,0.7071999708811442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,64,128,1,float16,float16,0,0.7747626304626465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,64,128,1,float16,fp8,0,0.7687520186106364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,64,0,1,float16,float16,0,0.7896373271942139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,64,128,1,fp8,fp8,0,0.8138773441314697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,64,0,1,float16,fp8,0,0.7816267013549805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,64,0,1,fp8,fp8,0,0.8291040261586508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,64,128,1,float16,float16,0,0.7549920082092285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,64,0,1,float16,float16,0,0.7636000315348307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,64,128,1,float16,fp8,0,0.7439359823862711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,64,128,1,fp8,fp8,0,0.7962133089701334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,64,0,1,float16,fp8,0,0.7537439664204916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,64,0,1,fp8,fp8,0,0.8082293669382731
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,64,128,1,float16,float16,0,0.39798398812611896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,64,0,1,float16,float16,0,0.4043946663538615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,64,128,1,float16,fp8,0,0.39242132504781085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,64,128,1,fp8,fp8,0,0.39931734402974445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,64,0,1,float16,fp8,0,0.3968106508255005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,64,0,1,fp8,fp8,0,0.40273066361745197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,64,128,1,float16,float16,0,0.3865813414255778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,64,0,1,float16,float16,0,0.3901333411534627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,64,128,1,float16,fp8,0,0.3836373488108317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,64,128,1,fp8,fp8,0,0.3550186554590861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,64,0,1,float16,fp8,0,0.3873973290125529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,64,0,1,fp8,fp8,0,0.3594026565551758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,64,128,1,float16,float16,0,0.3903520107269287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,64,0,1,float16,float16,0,0.39346134662628174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,64,128,1,float16,fp8,0,0.38598934809366864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,64,128,1,fp8,fp8,0,0.35973334312438965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,64,0,1,float16,fp8,0,0.39103468259175617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,64,0,1,fp8,fp8,0,0.36213334401448566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,64,0,1,float16,float16,0,0.4039146502812703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,64,128,1,float16,float16,0,0.396506667137146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,64,128,1,float16,fp8,0,0.3949600060780843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,64,128,1,fp8,fp8,0,0.39692266782124835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,64,0,1,float16,fp8,0,0.4010026852289836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,64,128,1,float16,float16,0,0.3858506679534912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,64,0,1,fp8,fp8,0,0.40212265650431317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,64,0,1,float16,float16,0,0.393120010693868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,64,128,1,float16,fp8,0,0.38275734583536786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,64,128,1,fp8,fp8,0,0.3890133301417033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,64,0,1,float16,fp8,0,0.3886880079905192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,64,0,1,fp8,fp8,0,0.39487465222676593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,64,128,1,float16,float16,0,0.21022399266560873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,64,0,1,float16,float16,0,0.21371734142303467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,64,128,1,float16,fp8,0,0.20643200476964316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,64,128,1,fp8,fp8,0,0.2095200022061666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,64,0,1,float16,fp8,0,0.21050665775934854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,64,0,1,fp8,fp8,0,0.21134400367736816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,64,128,1,float16,float16,0,0.20241600275039673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,64,0,1,float16,float16,0,0.20442134141921997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,64,128,1,float16,fp8,0,0.20123199621836343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,64,128,1,fp8,fp8,0,0.185370663801829
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,64,0,1,float16,fp8,0,0.20333333810170492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,64,0,1,fp8,fp8,0,0.18877865870793661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,64,128,1,float16,float16,0,0.20492800076802573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,64,0,1,float16,float16,0,0.2053226629892985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,64,128,1,fp8,fp8,0,0.18794665733973184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,64,128,1,float16,fp8,0,0.20288532972335815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,64,0,1,float16,fp8,0,0.2060533364613851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,64,0,1,fp8,fp8,0,0.19126933813095093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,64,128,1,float16,float16,0,0.2076639930407206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,64,0,1,float16,float16,0,0.20994667212168375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,64,128,1,float16,fp8,0,0.2076479991277059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,64,128,1,fp8,fp8,0,0.20067733526229858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,64,0,1,float16,fp8,0,0.21039466063181558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,64,0,1,fp8,fp8,0,0.20255466302235922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,64,0,1,float16,float16,0,0.20637865861256918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,64,128,1,float16,float16,0,0.20269866784413657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,64,128,1,float16,fp8,0,0.20310399929682413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,64,128,1,fp8,fp8,0,0.2000853419303894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,64,0,1,float16,fp8,0,0.20510399341583252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,64,0,1,fp8,fp8,0,0.20125333468119302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,64,128,1,float16,float16,0,0.1163200040658315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,64,0,1,float16,float16,0,0.11813867092132568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,64,128,1,float16,fp8,0,0.11402133107185364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,64,128,1,fp8,fp8,0,0.11731732885042827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,64,0,1,float16,fp8,0,0.11672000090281169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,64,0,1,fp8,fp8,0,0.11735467116038005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,64,128,1,float16,float16,0,0.10749866565068562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,64,0,1,float16,float16,0,0.10948266585667928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,64,128,1,float16,fp8,0,0.10872000455856323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,64,128,1,fp8,fp8,0,0.1018293301264445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,64,0,1,float16,fp8,0,0.10975466171900432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,64,0,1,fp8,fp8,0,0.10290132959683736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,64,128,1,float16,float16,0,0.10966400305430095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,64,0,1,float16,float16,0,0.1127306620279948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,64,128,1,float16,fp8,0,0.10931199789047241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,64,128,1,fp8,fp8,0,0.10322667161623637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,64,0,1,float16,fp8,0,0.11265066266059875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,64,0,1,fp8,fp8,0,0.10654933253924052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,64,128,1,float16,float16,0,0.11286399761835735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,64,0,1,float16,float16,0,0.11375466982523601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,64,128,1,float16,fp8,0,0.11274666587511699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,64,128,1,fp8,fp8,0,0.11050132910410564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,64,0,1,float16,fp8,0,0.11327466368675232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,64,0,1,fp8,fp8,0,0.11187733213106792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,64,128,1,float16,float16,0,0.11218667030334473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,64,0,1,float16,float16,0,0.11532266934712727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,64,128,1,float16,fp8,0,0.10936533411343892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,64,128,1,fp8,fp8,0,0.10969066619873047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,64,128,1,float16,float16,0,0.06385066608587901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,64,0,1,float16,fp8,0,0.11344533165295918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,64,0,1,fp8,fp8,0,0.11158933242162068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,64,0,1,float16,float16,0,0.0649599979321162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,64,128,1,float16,fp8,0,0.06222933530807495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,64,128,1,fp8,fp8,0,0.06609066824118297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,64,0,1,float16,fp8,0,0.0652159998814265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,64,0,1,fp8,fp8,0,0.06638399759928386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,64,128,1,float16,float16,0,0.06211199859778086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,64,0,1,float16,float16,0,0.06256533165772755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,64,128,1,float16,fp8,0,0.062368000547091164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,64,128,1,fp8,fp8,0,0.058448001742362976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,64,0,1,float16,fp8,0,0.06221333146095276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,64,0,1,fp8,fp8,0,0.05813866853713989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,64,128,1,float16,float16,0,0.062181333700815834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,64,0,1,float16,float16,0,0.06227200229962667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,64,128,1,float16,fp8,0,0.06198933223883311
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,64,128,1,fp8,fp8,0,0.06025066475073496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,64,0,1,float16,fp8,0,0.062309334675470986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,64,0,1,fp8,fp8,0,0.060218666990598045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,64,128,1,float16,float16,0,0.06443200012048085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,64,0,1,float16,float16,0,0.06414933502674103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,64,128,1,float16,fp8,0,0.06417066852251689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,64,128,1,fp8,fp8,0,0.06121600170930227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,64,0,1,float16,fp8,0,0.06459733347098033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,64,0,1,fp8,fp8,0,0.0624533345301946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,64,128,1,float16,float16,0,0.06235733131567637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,64,0,1,float16,float16,0,0.06226666768391927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,64,128,1,float16,fp8,0,0.062224000692367554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,64,128,1,fp8,fp8,0,0.062319998939832054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,64,0,1,float16,fp8,0,0.062368000547091164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,64,0,1,fp8,fp8,0,0.06224533418814341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,64,128,1,float16,float16,0,0.04195733368396759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,64,0,1,float16,float16,0,0.04205866654713949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,64,128,1,float16,fp8,0,0.03985599925120672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,64,128,1,fp8,fp8,0,0.04161600023508072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,64,0,1,float16,fp8,0,0.04370133578777313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,64,0,1,fp8,fp8,0,0.04297066728274027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,64,128,1,float16,float16,0,0.041349334021409355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,64,0,1,float16,float16,0,0.04155733436346054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,64,128,1,float16,fp8,0,0.04028266668319702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,64,128,1,fp8,fp8,0,0.038831998904546104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,64,0,1,float16,fp8,0,0.04196266829967499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,64,0,1,fp8,fp8,0,0.03980266551176707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,64,128,1,float16,float16,0,0.03962666789690653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,64,0,1,float16,float16,0,0.041690667470296226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,64,128,1,float16,fp8,0,0.04008533308903376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,64,128,1,fp8,fp8,0,0.03956799954175949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,64,0,1,float16,fp8,0,0.041562666495641075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,64,0,1,fp8,fp8,0,0.039749334255854286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,64,128,1,float16,float16,0,0.04049066702524821
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,64,0,1,float16,float16,0,0.04387199878692627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,64,128,1,float16,fp8,0,0.040991999208927155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,64,128,1,fp8,fp8,0,0.04196266829967499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,64,0,1,float16,fp8,0,0.04363200068473816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,64,0,1,fp8,fp8,0,0.04162666698296865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,64,128,1,float16,float16,0,0.0417546679576238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,64,0,1,float16,float16,0,0.04191466669241587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,64,128,1,float16,fp8,0,0.04167999823888143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,64,128,1,fp8,fp8,0,0.0418453315893809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,64,0,1,float16,fp8,0,0.042021334171295166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,64,0,1,fp8,fp8,0,0.041840001940727234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,64,128,1,float16,float16,0,0.0276853342851003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,64,0,1,float16,float16,0,0.027482666075229645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,64,128,1,float16,fp8,0,0.027765333652496338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,64,128,1,fp8,fp8,0,0.02738133321205775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,64,0,1,float16,fp8,0,0.027530667682488758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,64,0,1,fp8,fp8,0,0.027424000203609467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,64,128,1,float16,float16,0,0.02548266698916753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,64,0,1,float16,float16,0,0.02757866680622101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,64,128,1,float16,fp8,0,0.025472000241279602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,64,128,1,fp8,fp8,0,0.02738133321205775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,64,0,1,float16,fp8,0,0.027509334186712902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,64,0,1,fp8,fp8,0,0.025594666600227356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,64,128,1,float16,float16,0,0.027280000348885853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,64,0,1,float16,float16,0,0.027290667096773785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,64,128,1,float16,fp8,0,0.026501332720120747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,64,128,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,64,0,1,float16,fp8,0,0.027637332677841187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,64,0,1,fp8,fp8,0,0.027280000348885853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,64,128,1,float16,float16,0,0.027386667827765148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,64,0,1,float16,float16,0,0.02758399893840154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,64,128,1,float16,fp8,0,0.02714666724205017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,64,128,1,fp8,fp8,0,0.02752000093460083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,64,0,1,float16,fp8,0,0.02846933404604594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,64,0,1,fp8,fp8,0,0.02736533433198929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,64,128,1,float16,float16,0,0.02714666724205017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,64,0,1,float16,float16,0,0.028277332584063213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,64,128,1,float16,fp8,0,0.027509334186712902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,64,128,1,fp8,fp8,0,0.02757333219051361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,64,0,1,float16,fp8,0,0.029002666473388672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,64,0,1,fp8,fp8,0,0.027535999814669292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,64,128,1,float16,float16,0,0.02332799881696701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,64,0,1,float16,float16,0,0.023311999936898548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,64,128,1,float16,fp8,0,0.02346133440732956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,64,128,1,fp8,fp8,0,0.023317334552605946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,64,0,1,float16,fp8,0,0.02330133318901062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,64,0,1,fp8,fp8,0,0.02369600037733714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,64,128,1,float16,float16,0,0.02309866746266683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,64,0,1,float16,float16,0,0.023237332701683044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,64,128,1,float16,fp8,0,0.02316266546646754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,64,128,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,64,0,1,float16,fp8,0,0.0233599990606308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,64,0,1,fp8,fp8,0,0.02254933367172877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,64,128,1,float16,float16,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,64,0,1,float16,float16,0,0.02329600105683009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,64,128,1,fp8,fp8,0,0.023370665808518726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,64,128,1,float16,fp8,0,0.023552000522613525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,64,0,1,float16,fp8,0,0.023050665855407715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,64,0,1,fp8,fp8,0,0.02348266790310542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,64,0,1,float16,float16,0,0.023215999205907185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,64,128,1,float16,float16,0,0.023557332654794056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,64,128,1,float16,fp8,0,0.023503998915354412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,64,128,1,fp8,fp8,0,0.023386667172114056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,64,0,1,float16,fp8,0,0.023941333095232647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,64,0,1,fp8,fp8,0,0.023082666099071503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,64,128,1,float16,float16,0,0.02309866746266683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,64,0,1,float16,float16,0,0.023557332654794056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,64,128,1,float16,fp8,0,0.023311999936898548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,64,128,1,fp8,fp8,0,0.023237332701683044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,64,0,1,fp8,fp8,0,0.023413332800070446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,64,0,1,float16,fp8,0,0.025461333493391674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,64,128,1,float16,float16,0,0.021226666867733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,64,0,1,float16,float16,0,0.02295999974012375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,64,128,1,fp8,fp8,0,0.021104000508785248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,64,128,1,float16,fp8,0,0.023397333920001984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,64,0,1,float16,fp8,0,0.02163200080394745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,64,0,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,64,128,1,float16,float16,0,0.021541332205136616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,64,0,1,float16,float16,0,0.021781332790851593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,64,128,1,float16,fp8,0,0.021114667256673176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,64,128,1,fp8,fp8,0,0.021509334444999695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,64,0,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,64,0,1,fp8,fp8,0,0.02145066608985265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,64,128,1,float16,float16,0,0.021664001047611237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,64,0,1,float16,float16,0,0.021349333226680756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,64,128,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,64,0,1,float16,fp8,0,0.0230880007147789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,64,128,1,fp8,fp8,0,0.021418665846188862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,64,0,1,fp8,fp8,0,0.021018666525681812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,64,128,1,float16,float16,0,0.021370666722456615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,64,128,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,64,0,1,float16,float16,0,0.023269332945346832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,64,0,1,float16,fp8,0,0.02147199958562851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,64,128,1,fp8,fp8,0,0.021568000316619873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,64,0,1,fp8,fp8,0,0.021903999149799347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,64,128,1,float16,float16,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,64,0,1,float16,float16,0,0.02319466571013133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,64,128,1,float16,fp8,0,0.021669333179791767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,64,128,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,64,0,1,float16,fp8,0,0.021370666722456615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,64,0,1,fp8,fp8,0,0.02162666618824005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,64,128,1,float16,float16,0,0.3543093204498291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,64,0,1,float16,float16,0,0.34643733501434326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,64,128,1,float16,fp8,0,0.3508960008621216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,64,128,1,fp8,fp8,0,0.3272426724433899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,64,0,1,float16,fp8,0,0.3410186767578125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,64,0,1,fp8,fp8,0,0.31806933879852295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,64,128,1,float16,float16,0,0.3601280053456624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,64,0,1,float16,float16,0,0.35334400335947674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,64,128,1,float16,fp8,0,0.3592960039774577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,64,128,1,fp8,fp8,0,0.3307200074195862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,64,0,1,float16,fp8,0,0.35119465986887616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,64,0,1,fp8,fp8,0,0.3245493372281392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,64,128,1,float16,float16,0,0.37119468053181964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,64,0,1,float16,float16,0,0.36602667967478436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,64,128,1,float16,fp8,0,0.36974934736887616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,64,128,1,fp8,fp8,0,0.3684053421020508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,64,0,1,float16,fp8,0,0.36101865768432617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,64,128,1,float16,float16,0,0.3621600071589152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,64,0,1,fp8,fp8,0,0.35944000879923504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,64,0,1,float16,float16,0,0.3526666561762492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,64,128,1,float16,fp8,0,0.35558398564656574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,64,128,1,fp8,fp8,0,0.3603626489639282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,64,0,1,float16,fp8,0,0.35050666332244873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,64,128,1,float16,float16,0,0.1974560022354126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,64,0,1,fp8,fp8,0,0.3582506577173869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,64,0,1,float16,float16,0,0.19340799252192178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,64,128,1,float16,fp8,0,0.19486399491628012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,64,128,1,fp8,fp8,0,0.19660266240437826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,64,0,1,float16,fp8,0,0.1883466641108195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,64,0,1,fp8,fp8,0,0.1935466726620992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,64,128,1,float16,float16,0,0.18761066595713297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,64,0,1,float16,float16,0,0.18336532513300577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,64,128,1,float16,fp8,0,0.18354666233062744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,64,128,1,fp8,fp8,0,0.1746293306350708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,64,0,1,fp8,fp8,0,0.16879467169443765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,64,0,1,float16,fp8,0,0.18101867039998373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,64,128,1,float16,float16,0,0.18930133183797201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,64,0,1,float16,float16,0,0.1860533356666565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,64,128,1,float16,fp8,0,0.18733332554499307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,64,128,1,fp8,fp8,0,0.1750133236249288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,64,0,1,float16,fp8,0,0.1844266653060913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,64,0,1,fp8,fp8,0,0.17112000783284506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,64,128,1,float16,float16,0,0.1952106753985087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,64,0,1,float16,float16,0,0.19150400161743164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,64,128,1,float16,fp8,0,0.19391467173894247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,64,128,1,fp8,fp8,0,0.18621333440144858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,64,0,1,float16,fp8,0,0.18991466363271078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,64,0,1,fp8,fp8,0,0.1817973256111145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,64,128,1,float16,float16,0,0.18995199600855509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,64,0,1,float16,float16,0,0.18703466653823853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,64,128,1,float16,fp8,0,0.18795732657114664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,64,128,1,fp8,fp8,0,0.18851733207702637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,64,0,1,float16,fp8,0,0.1839039921760559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,64,0,1,float16,float16,0,0.1055466632048289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,64,128,1,float16,float16,0,0.10858133435249329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,64,0,1,fp8,fp8,0,0.1835626761118571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,64,128,1,float16,fp8,0,0.10659733414649963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,64,128,1,fp8,fp8,0,0.10954667131106059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,64,0,1,float16,fp8,0,0.10557867089907329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,64,0,1,fp8,fp8,0,0.10711466272672017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,64,128,1,float16,float16,0,0.09941866993904114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,64,0,1,float16,float16,0,0.09736532966295879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,64,128,1,float16,fp8,0,0.0990666647752126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,64,128,1,fp8,fp8,0,0.09321066737174988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,64,0,1,float16,fp8,0,0.09734400113423665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,64,0,1,fp8,fp8,0,0.09087999661763509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,64,128,1,float16,float16,0,0.10126933455467224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,64,0,1,float16,float16,0,0.09921600421269734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,64,128,1,float16,fp8,0,0.10034132997194926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,64,128,1,fp8,fp8,0,0.09524266918500264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,64,0,1,float16,fp8,0,0.09914666414260864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,64,0,1,fp8,fp8,0,0.09307199716567993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,64,128,1,float16,float16,0,0.10518933335940044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,64,0,1,float16,float16,0,0.10308800141016643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,64,128,1,float16,fp8,0,0.10346133510271709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,64,128,1,fp8,fp8,0,0.10266133149464925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,64,0,1,float16,fp8,0,0.10245866576830547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,64,0,1,fp8,fp8,0,0.09994666775067647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,64,128,1,float16,float16,0,0.10409067074457805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,64,0,1,float16,float16,0,0.10134399930636089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,64,128,1,float16,fp8,0,0.10106133421262105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,64,128,1,fp8,fp8,0,0.1032533347606659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,64,0,1,float16,fp8,0,0.09954667091369629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,64,0,1,fp8,fp8,0,0.09973866740862529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,64,128,1,float16,float16,0,0.06036800146102905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,64,0,1,float16,float16,0,0.05840533475081126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,64,128,1,float16,fp8,0,0.059989333152770996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,64,0,1,float16,fp8,0,0.05835733314355215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,64,128,1,fp8,fp8,0,0.06252799928188324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,64,0,1,fp8,fp8,0,0.06230400005976359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,64,0,1,float16,float16,0,0.057946667075157166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,64,128,1,float16,float16,0,0.05840533475081126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,64,128,1,float16,fp8,0,0.05808533231417338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,64,128,1,fp8,fp8,0,0.05584000051021576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,64,0,1,float16,fp8,0,0.05716800192991892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,64,0,1,fp8,fp8,0,0.05409599840641022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,64,128,1,float16,float16,0,0.0580320010582606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,64,0,1,float16,float16,0,0.058575997749964394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,64,128,1,float16,fp8,0,0.05830933153629303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,64,128,1,fp8,fp8,0,0.05615466833114624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,64,0,1,float16,fp8,0,0.0581279993057251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,64,0,1,fp8,fp8,0,0.053770666321118675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,64,128,1,float16,float16,0,0.060090666015942894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,64,0,1,float16,float16,0,0.05821333328882853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,64,128,1,float16,fp8,0,0.05845866600672404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,64,128,1,fp8,fp8,0,0.05793599784374237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,64,0,1,float16,fp8,0,0.05836800237496694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,64,0,1,fp8,fp8,0,0.05592533449331919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,64,128,1,float16,float16,0,0.06005866825580597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,64,0,1,float16,float16,0,0.05667733152707418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,64,128,1,float16,fp8,0,0.058117335041364036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,64,128,1,fp8,fp8,0,0.05874133110046387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,64,0,1,float16,fp8,0,0.05639466643333435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,64,0,1,fp8,fp8,0,0.05686399837334951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,64,128,1,float16,float16,0,0.03781333317359289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,64,0,1,float16,float16,0,0.0379573330283165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,64,128,1,float16,fp8,0,0.03774933268626531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,64,128,1,fp8,fp8,0,0.0395359992980957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,64,0,1,float16,fp8,0,0.03760000069936117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,64,0,1,fp8,fp8,0,0.037477334340413414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,64,128,1,float16,float16,0,0.037903999288876854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,64,128,1,float16,fp8,0,0.03772266705830892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,64,0,1,float16,float16,0,0.03585066646337509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,64,128,1,fp8,fp8,0,0.03586133321126302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,64,0,1,float16,fp8,0,0.03572266548871994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,64,0,1,fp8,fp8,0,0.03550933301448822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,64,0,1,float16,float16,0,0.0364479993780454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,64,128,1,float16,float16,0,0.0383146678407987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,64,128,1,float16,fp8,0,0.03763733307520548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,64,128,1,fp8,fp8,0,0.037647999823093414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,64,0,1,float16,fp8,0,0.03604800005753835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,64,128,1,float16,float16,0,0.03930133332808813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,64,0,1,float16,float16,0,0.037690666814645134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,64,0,1,fp8,fp8,0,0.035717333356539406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,64,128,1,float16,fp8,0,0.03962666789690653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,64,128,1,fp8,fp8,0,0.037445334096749626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,64,0,1,float16,fp8,0,0.03805333375930786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,64,0,1,fp8,fp8,0,0.037578667203585304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,64,128,1,float16,float16,0,0.037589333951473236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,64,0,1,float16,float16,0,0.035418666899204254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,64,128,1,float16,fp8,0,0.03772266705830892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,64,128,1,fp8,fp8,0,0.03833599885304769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,64,0,1,float16,fp8,0,0.03647999962170919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,64,0,1,fp8,fp8,0,0.03738133360942205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,64,128,1,float16,float16,0,0.025311999022960663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,64,0,1,float16,float16,0,0.02534399926662445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,64,128,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,64,128,1,fp8,fp8,0,0.025077333052953083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,64,0,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,64,0,1,fp8,fp8,0,0.025173333783944447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,64,128,1,float16,float16,0,0.02534399926662445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,64,0,1,float16,float16,0,0.025066666305065155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,64,128,1,float16,fp8,0,0.02537599951028824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,64,128,1,fp8,fp8,0,0.025226667523384094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,64,0,1,float16,fp8,0,0.025279998779296875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,64,0,1,fp8,fp8,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,64,128,1,float16,float16,0,0.025498665869235992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,64,0,1,float16,float16,0,0.025311999022960663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,64,128,1,float16,fp8,0,0.02573866645495097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,64,128,1,fp8,fp8,0,0.02518933266401291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,64,0,1,float16,fp8,0,0.025392000873883564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,64,0,1,fp8,fp8,0,0.025536000728607178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,64,128,1,float16,float16,0,0.025445332129796345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,64,0,1,float16,float16,0,0.025392000873883564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,64,128,1,float16,fp8,0,0.025583999852339428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,64,128,1,fp8,fp8,0,0.025477332373460133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,64,0,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,64,0,1,fp8,fp8,0,0.025578667720158894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,64,128,1,float16,float16,0,0.025493333737055462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,64,0,1,float16,float16,0,0.02535466601451238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,64,128,1,float16,fp8,0,0.025231999655564625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,64,128,1,fp8,fp8,0,0.025605333348115284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,64,0,1,float16,fp8,0,0.025461333493391674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,64,0,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,64,128,1,float16,float16,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,64,0,1,float16,float16,0,0.021418665846188862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,64,128,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,64,128,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,64,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,64,0,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,64,128,1,float16,float16,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,64,0,1,float16,float16,0,0.01971199984351794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,64,128,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,64,128,1,fp8,fp8,0,0.02109333376089732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,64,0,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,64,0,1,fp8,fp8,0,0.019402666638294857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,64,128,1,float16,float16,0,0.021338666478792827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,64,0,1,float16,float16,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,64,128,1,float16,fp8,0,0.020960000654061634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,64,128,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,64,0,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,64,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,64,128,1,float16,float16,0,0.021226666867733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,64,0,1,float16,float16,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,64,128,1,float16,fp8,0,0.02144533395767212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,64,128,1,fp8,fp8,0,0.021685334543387096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,64,0,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,64,0,1,fp8,fp8,0,0.020848001043001812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,64,128,1,float16,float16,0,0.02141333371400833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,64,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,64,128,1,float16,fp8,0,0.021040000021457672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,64,128,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,64,0,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,64,0,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,64,128,1,float16,float16,0,0.02090666691462199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,64,0,1,float16,float16,0,0.019120000302791595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,64,128,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,64,128,1,fp8,fp8,0,0.019509332875410717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,64,0,1,float16,fp8,0,0.019386666516462963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,64,0,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,64,128,1,float16,float16,0,0.01941866676012675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,64,0,1,float16,float16,0,0.019167999426523846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,64,128,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,64,128,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,64,0,1,float16,fp8,0,0.019482667247454327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,64,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,64,128,1,float16,float16,0,0.019237333287795384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,64,0,1,float16,float16,0,0.01903466631968816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,64,128,1,float16,fp8,0,0.019413333386182785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,64,128,1,fp8,fp8,0,0.01966933285196622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,64,0,1,float16,fp8,0,0.019424000134070713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,64,0,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,64,128,1,float16,float16,0,0.020960000654061634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,64,0,1,float16,float16,0,0.019413333386182785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,64,128,1,float16,fp8,0,0.02128000060717265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,64,128,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,64,0,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,64,0,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,64,128,1,float16,float16,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,64,0,1,float16,float16,0,0.01934933289885521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,64,128,1,float16,fp8,0,0.021488000949223835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,64,128,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,64,0,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,64,0,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,64,128,1,float16,float16,0,0.01958400011062622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,64,0,1,float16,float16,0,0.01807466646035512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,64,128,1,float16,fp8,0,0.019381333142518997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,64,128,1,fp8,fp8,0,0.019386666516462963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,64,0,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,64,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,64,128,1,float16,float16,0,0.019487999379634857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,64,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,64,128,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,64,128,1,fp8,fp8,0,0.017829333742459614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,64,0,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,64,0,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,64,128,1,float16,float16,0,0.019343999524911244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,64,0,1,float16,float16,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,64,128,1,float16,fp8,0,0.019141333798567455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,64,128,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,64,0,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,64,0,1,fp8,fp8,0,0.018101333330074947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,64,0,1,float16,float16,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,64,128,1,float16,float16,0,0.01969066634774208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,64,128,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,64,128,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,64,0,1,float16,fp8,0,0.018837332725524902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,64,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,64,128,1,float16,float16,0,0.01915733392039935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,64,128,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,64,0,1,float16,float16,0,0.019999999552965164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,64,128,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,64,0,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,64,0,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,64,128,1,float16,float16,0,0.19690134127934775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,64,0,1,float16,float16,0,0.19702933231989542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,64,128,1,float16,fp8,0,0.19405867656071982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,64,128,1,fp8,fp8,0,0.18736000855763754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,64,0,1,float16,fp8,0,0.19409066438674927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,64,0,1,fp8,fp8,0,0.18754132588704428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,64,128,1,float16,float16,0,0.19763733943303427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,64,0,1,float16,float16,0,0.19715199867884317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,64,128,1,float16,fp8,0,0.1962239940961202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,64,128,1,fp8,fp8,0,0.18018666903177896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,64,0,1,float16,fp8,0,0.1951520045598348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,64,0,1,fp8,fp8,0,0.18172800540924072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,64,128,1,float16,float16,0,0.20169599850972494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,64,0,1,float16,float16,0,0.20053333044052124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,64,128,1,float16,fp8,0,0.1980746587117513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,64,128,1,fp8,fp8,0,0.19451733430226645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,64,0,1,float16,fp8,0,0.1976213256518046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,64,0,1,fp8,fp8,0,0.1941866676012675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,64,128,1,float16,float16,0,0.19960000117619833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,64,0,1,float16,float16,0,0.20094400644302368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,64,128,1,float16,fp8,0,0.19819732507069907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,64,128,1,fp8,fp8,0,0.1929759979248047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,64,0,1,float16,fp8,0,0.19977066914240518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,64,128,1,float16,float16,0,0.11155733466148376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,64,0,1,float16,float16,0,0.1113973359266917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,64,0,1,fp8,fp8,0,0.19352000951766968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,64,128,1,float16,fp8,0,0.10987733801205952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,64,128,1,fp8,fp8,0,0.11241599917411804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,64,0,1,float16,fp8,0,0.11055999994277954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,64,0,1,fp8,fp8,0,0.11209066708882649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,64,128,1,float16,float16,0,0.10523733496665955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,64,0,1,float16,float16,0,0.10556800166765849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,64,128,1,float16,fp8,0,0.10521599650382996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,64,128,1,fp8,fp8,0,0.09967999656995137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,64,0,1,float16,fp8,0,0.10531733433405559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,64,0,1,fp8,fp8,0,0.09915733337402344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,64,128,1,float16,float16,0,0.107424000898997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,64,0,1,float16,float16,0,0.10758399963378906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,64,128,1,float16,fp8,0,0.10533333818117778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,64,128,1,fp8,fp8,0,0.09963200489679973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,64,0,1,fp8,fp8,0,0.10045866171518962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,64,0,1,float16,fp8,0,0.10567466417948405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,64,128,1,float16,float16,0,0.1074186662832896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,64,0,1,float16,float16,0,0.10802132884661357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,64,128,1,float16,fp8,0,0.10785067081451416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,64,0,1,float16,fp8,0,0.10697600245475769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,64,128,1,fp8,fp8,0,0.10733866691589355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,64,0,1,fp8,fp8,0,0.10543466607729594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,64,128,1,float16,float16,0,0.10943466424942017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,64,0,1,float16,float16,0,0.10834667086601257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,64,128,1,fp8,fp8,0,0.10884799559911092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,64,128,1,float16,fp8,0,0.11027200023333232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,64,0,1,float16,fp8,0,0.10804800192515056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,64,0,1,fp8,fp8,0,0.10628799597422282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,64,128,1,float16,float16,0,0.06035733222961426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,64,0,1,float16,float16,0,0.06271466612815857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,64,128,1,float16,fp8,0,0.06030400097370148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,64,128,1,fp8,fp8,0,0.06300266583760579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,64,0,1,float16,fp8,0,0.060746664802233376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,64,128,1,float16,float16,0,0.05996266504128774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,64,0,1,fp8,fp8,0,0.06262933214505513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,64,0,1,float16,float16,0,0.05834133426348368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,64,128,1,float16,fp8,0,0.05829866727193197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,64,128,1,fp8,fp8,0,0.05516799787680308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,64,0,1,float16,fp8,0,0.058746665716171265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,64,0,1,fp8,fp8,0,0.056143999099731445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,64,128,1,float16,float16,0,0.058277333776156105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,64,0,1,float16,float16,0,0.058362667759259544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,64,128,1,fp8,fp8,0,0.055919999877611794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,64,128,1,float16,fp8,0,0.05830933153629303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,64,0,1,float16,fp8,0,0.058090666929880776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,64,0,1,fp8,fp8,0,0.05605866511662801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,64,128,1,float16,float16,0,0.05994666616121928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,64,128,1,float16,fp8,0,0.060415998101234436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,64,0,1,float16,float16,0,0.06021333237489065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,64,128,1,fp8,fp8,0,0.05799466868241628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,64,0,1,float16,fp8,0,0.05993066728115082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,64,0,1,fp8,fp8,0,0.05829333265622457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,64,128,1,float16,float16,0,0.060234665870666504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,64,0,1,float16,float16,0,0.0603413333495458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,64,128,1,float16,fp8,0,0.06000000238418579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,64,128,1,fp8,fp8,0,0.05936533212661743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,64,0,1,float16,fp8,0,0.058490668733914696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,64,0,1,fp8,fp8,0,0.059562668204307556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,64,128,1,float16,float16,0,0.03765333443880081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,64,0,1,float16,float16,0,0.0394400010506312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,64,128,1,float16,fp8,0,0.03909866760174433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,64,128,1,fp8,fp8,0,0.03978666663169861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,64,0,1,float16,fp8,0,0.038245332737763725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,64,0,1,fp8,fp8,0,0.03905066599448522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,64,128,1,float16,float16,0,0.039493332306543984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,64,0,1,float16,float16,0,0.0378506655494372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,64,128,1,float16,fp8,0,0.03782399992148081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,64,128,1,fp8,fp8,0,0.03710933278004328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,64,0,1,float16,fp8,0,0.037802666425704956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,64,0,1,fp8,fp8,0,0.038015998899936676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,64,128,1,float16,float16,0,0.037871999045213066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,64,0,1,float16,float16,0,0.03982399900754293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,64,128,1,fp8,fp8,0,0.037690666814645134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,64,128,1,float16,fp8,0,0.03987200061480204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,64,0,1,float16,fp8,0,0.03939733405907949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,64,0,1,fp8,fp8,0,0.037808001041412354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,64,0,1,float16,float16,0,0.039674667020638786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,64,128,1,float16,float16,0,0.03965333352486292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,64,128,1,float16,fp8,0,0.039749334255854286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,64,128,1,fp8,fp8,0,0.039488000174363456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,64,0,1,float16,fp8,0,0.03972800076007843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,64,0,1,fp8,fp8,0,0.03942399968703588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,64,128,1,float16,float16,0,0.0377866675456365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,64,0,1,float16,float16,0,0.03953066716591517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,64,128,1,float16,fp8,0,0.03835200021664301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,64,128,1,fp8,fp8,0,0.037615999579429626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,64,0,1,float16,fp8,0,0.03969600051641464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,64,128,1,float16,float16,0,0.025418666501839954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,64,0,1,fp8,fp8,0,0.03790933390458425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,64,0,1,float16,float16,0,0.027488000690937042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,64,128,1,float16,fp8,0,0.025279998779296875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,64,128,1,fp8,fp8,0,0.02741866558790207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,64,0,1,float16,fp8,0,0.02731200059254964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,64,0,1,fp8,fp8,0,0.027237333357334137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,64,128,1,float16,float16,0,0.027087998886903126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,64,0,1,float16,float16,0,0.025472000241279602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,64,128,1,fp8,fp8,0,0.02554133286078771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,64,128,1,float16,fp8,0,0.025626666843891144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,64,0,1,float16,fp8,0,0.02533866713444392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,64,0,1,fp8,fp8,0,0.025775998830795288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,64,128,1,float16,float16,0,0.02533866713444392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,64,0,1,float16,float16,0,0.0290133332212766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,64,128,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,64,128,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,64,0,1,float16,fp8,0,0.027376001079877216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,64,0,1,fp8,fp8,0,0.025487999121348064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,64,128,1,float16,float16,0,0.027189334233601887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,64,0,1,float16,float16,0,0.025450666745503742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,64,128,1,float16,fp8,0,0.026917333404223125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,64,128,1,fp8,fp8,0,0.027376001079877216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,64,0,1,float16,fp8,0,0.025557334224383037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,64,0,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,64,128,1,float16,float16,0,0.026517334083716076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,64,0,1,float16,float16,0,0.025578667720158894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,64,128,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,64,128,1,fp8,fp8,0,0.027514666318893433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,64,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,64,128,1,float16,float16,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,64,0,1,fp8,fp8,0,0.027237333357334137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,64,0,1,float16,float16,0,0.01951466624935468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,64,128,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,64,128,1,fp8,fp8,0,0.019930666933457058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,64,0,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,64,0,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,64,128,1,float16,float16,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,64,0,1,float16,float16,0,0.01939733326435089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,64,128,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,64,128,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,64,0,1,float16,fp8,0,0.019632000476121902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,64,0,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,64,128,1,float16,float16,0,0.01894933357834816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,64,0,1,float16,float16,0,0.018826667219400406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,64,128,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,64,128,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,64,0,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,64,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,64,128,1,float16,float16,0,0.019274666905403137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,64,0,1,float16,float16,0,0.01893866683046023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,64,128,1,float16,fp8,0,0.018911999960740406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,64,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,64,0,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,64,0,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,64,128,1,float16,float16,0,0.019071999937295914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,64,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,64,128,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,64,128,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,64,0,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,64,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,64,128,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,64,0,1,float16,float16,0,0.017445333302021027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,64,128,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,64,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,64,0,1,fp8,fp8,0,0.017738666385412216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,64,128,1,float16,float16,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,64,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,64,128,1,float16,fp8,0,0.018330667167901993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,64,0,1,float16,fp8,0,0.017711999515692394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,64,128,1,float16,float16,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,64,0,1,float16,float16,0,0.0173333336909612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,64,128,1,float16,fp8,0,0.018837332725524902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,64,128,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,64,0,1,float16,fp8,0,0.018730666488409042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,64,128,1,float16,float16,0,0.017231999586025875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,64,0,1,fp8,fp8,0,0.017621333400408428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,64,0,1,float16,float16,0,0.017450666675964992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,64,128,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,64,128,1,fp8,fp8,0,0.017605333278576534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,64,0,1,float16,fp8,0,0.018842666099468868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,64,0,1,fp8,fp8,0,0.017616000026464462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,64,128,1,float16,float16,0,0.01747200017174085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,64,0,1,float16,float16,0,0.017717332889636356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,64,128,1,float16,fp8,0,0.017551999539136887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,64,0,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,64,0,1,float16,fp8,0,0.017829333742459614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,64,128,1,float16,float16,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,64,0,1,float16,float16,0,0.017488000293572743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,64,128,1,float16,fp8,0,0.017845333864291508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,64,128,1,fp8,fp8,0,0.017610666652520496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,64,0,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,64,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,64,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,64,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,64,0,1,fp8,fp8,0,0.01670933390657107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,64,128,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,64,128,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,64,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,64,128,1,float16,float16,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,64,0,1,float16,float16,0,0.017498667041460674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,64,128,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,64,128,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,64,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,64,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,64,0,1,float16,float16,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,64,128,1,float16,float16,0,0.017621333400408428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,64,128,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,64,128,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,64,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,64,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,64,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,64,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,64,128,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,64,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,64,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,64,128,1,float16,float16,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,64,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,64,128,1,float16,fp8,0,0.01764800027012825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,64,128,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,64,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,64,0,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,64,128,1,float16,float16,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,64,0,1,float16,float16,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,64,128,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,64,128,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,64,128,1,float16,float16,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,64,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,64,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,64,0,1,float16,fp8,0,0.017616000026464462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,64,0,1,fp8,fp8,0,0.01670933390657107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,64,128,1,float16,float16,0,0.016373333831628162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,64,128,1,float16,fp8,0,0.017535999417304993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,64,128,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,64,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,64,0,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,64,0,1,float16,float16,0,0.14035733540852866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,64,128,1,float16,float16,0,0.13820800185203552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,64,128,1,float16,fp8,0,0.1383680005868276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,64,128,1,fp8,fp8,0,0.1299626628557841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,64,0,1,float16,fp8,0,0.13838932911554971
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,64,0,1,fp8,fp8,0,0.12944533427556357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,64,128,1,float16,float16,0,0.1402346690495809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,64,0,1,float16,float16,0,0.14032533764839172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,64,128,1,float16,fp8,0,0.13927466670672098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,64,128,1,fp8,fp8,0,0.12993599971135458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,64,0,1,float16,fp8,0,0.13874133427937826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,64,0,1,fp8,fp8,0,0.13012799620628357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,64,128,1,float16,float16,0,0.14150399963061014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,64,128,1,float16,fp8,0,0.14030399918556213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,64,128,1,fp8,fp8,0,0.1362826625506083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,64,0,1,float16,float16,0,0.1430346667766571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,64,0,1,float16,fp8,0,0.14203733205795288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,64,0,1,fp8,fp8,0,0.13538666566212973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,64,128,1,float16,float16,0,0.14291200041770935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,64,0,1,float16,float16,0,0.14272000392278036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,64,128,1,float16,fp8,0,0.14267200231552124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,64,128,1,fp8,fp8,0,0.13638400038083395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,64,0,1,float16,fp8,0,0.1418826679388682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,64,128,1,float16,float16,0,0.07915199796358745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,64,0,1,fp8,fp8,0,0.13703999916712442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,64,0,1,float16,float16,0,0.07876800000667572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,64,128,1,float16,fp8,0,0.07912533481915791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,64,128,1,fp8,fp8,0,0.07723199824492137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,64,0,1,float16,fp8,0,0.07726933558781941
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,64,0,1,fp8,fp8,0,0.07916266719500224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,64,0,1,float16,float16,0,0.0765119989713033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,64,128,1,float16,float16,0,0.07715199887752533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,64,128,1,float16,fp8,0,0.07628266513347626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,64,128,1,fp8,fp8,0,0.07239999870459239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,64,0,1,float16,fp8,0,0.07664533456166585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,64,0,1,fp8,fp8,0,0.0726560006539027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,64,128,1,float16,float16,0,0.07677866518497467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,64,0,1,float16,float16,0,0.07694399853547414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,64,128,1,float16,fp8,0,0.0765119989713033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,64,128,1,fp8,fp8,0,0.07252266506354015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,64,0,1,float16,fp8,0,0.07696533203125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,64,0,1,fp8,fp8,0,0.07076266904671986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,64,128,1,float16,float16,0,0.07674666742483775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,64,0,1,float16,float16,0,0.0768746683994929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,64,128,1,float16,fp8,0,0.07660800218582153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,64,128,1,fp8,fp8,0,0.07414400080839793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,64,0,1,float16,fp8,0,0.07654933134714763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,64,0,1,fp8,fp8,0,0.07457066575686137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,64,128,1,float16,float16,0,0.07630933324495952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,64,0,1,float16,float16,0,0.0766133318344752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,64,128,1,float16,fp8,0,0.07660266757011414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,64,128,1,fp8,fp8,0,0.07489599784215291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,64,0,1,float16,fp8,0,0.07684266567230225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,64,0,1,fp8,fp8,0,0.0747573326031367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,64,128,1,float16,float16,0,0.047983999053637184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,64,0,1,float16,float16,0,0.047925333182017006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,64,128,1,float16,fp8,0,0.04782933493455251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,64,128,1,fp8,fp8,0,0.0461760014295578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,64,0,1,float16,fp8,0,0.0460746685663859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,64,0,1,fp8,fp8,0,0.04590400060017904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,64,128,1,float16,float16,0,0.04577599962552389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,64,0,1,float16,float16,0,0.04580266773700714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,64,128,1,float16,fp8,0,0.045882667104403176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,64,128,1,fp8,fp8,0,0.04371733466784159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,64,0,1,float16,fp8,0,0.045925334095954895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,64,0,1,fp8,fp8,0,0.04377066592375437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,64,128,1,float16,float16,0,0.046112000942230225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,64,0,1,float16,float16,0,0.045893331368764244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,64,128,1,float16,fp8,0,0.045610666275024414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,64,128,1,fp8,fp8,0,0.04378133515516917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,64,0,1,float16,fp8,0,0.0460746685663859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,64,0,1,fp8,fp8,0,0.04331733286380768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,64,128,1,float16,float16,0,0.04736533264319102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,64,0,1,float16,float16,0,0.04796266555786133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,64,128,1,float16,fp8,0,0.04612799982229868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,64,128,1,fp8,fp8,0,0.046154667933781944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,64,0,1,float16,fp8,0,0.04781866570313772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,64,0,1,fp8,fp8,0,0.04589866598447164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,64,128,1,float16,float16,0,0.04701866706212362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,64,0,1,float16,float16,0,0.04775999983151754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,64,128,1,float16,fp8,0,0.047450666626294456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,64,0,1,float16,fp8,0,0.04621866842110952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,64,128,1,fp8,fp8,0,0.04585599899291992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,64,0,1,fp8,fp8,0,0.04550399879614512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,64,128,1,float16,float16,0,0.03128000100453695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,64,128,1,float16,fp8,0,0.031194667021433514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,64,0,1,float16,float16,0,0.031152000029881794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,64,128,1,fp8,fp8,0,0.029690665503342945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,64,0,1,float16,fp8,0,0.03156266609827677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,64,0,1,fp8,fp8,0,0.02940266579389572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,64,128,1,float16,float16,0,0.029616000751654308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,64,128,1,float16,fp8,0,0.02956266701221466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,64,0,1,float16,float16,0,0.03142933299144109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,64,128,1,fp8,fp8,0,0.029311999678611755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,64,0,1,float16,fp8,0,0.031258667508761086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,64,0,1,fp8,fp8,0,0.029648000995318096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,64,128,1,float16,float16,0,0.0315733328461647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,64,0,1,float16,float16,0,0.031530665854612984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,64,128,1,float16,fp8,0,0.03160000095764796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,64,128,1,fp8,fp8,0,0.02958933264017105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,64,0,1,fp8,fp8,0,0.02940266579389572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,64,128,1,float16,float16,0,0.031114667654037476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,64,0,1,float16,float16,0,0.031189332405726116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,64,0,1,float16,fp8,0,0.031221332649389904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,64,128,1,float16,fp8,0,0.03125333289305369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,64,128,1,fp8,fp8,0,0.029509333272775013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,64,0,1,fp8,fp8,0,0.029605334003766377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,64,0,1,float16,fp8,0,0.03127466638882955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,64,128,1,float16,float16,0,0.03133866687615713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,64,0,1,float16,float16,0,0.031541332602500916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,64,128,1,fp8,fp8,0,0.029290666182835896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,64,128,1,float16,fp8,0,0.03143466760714849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,64,0,1,float16,fp8,0,0.031343999008337654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,64,0,1,float16,float16,0,0.021733333667119343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,64,0,1,fp8,fp8,0,0.03133866687615713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,64,128,1,float16,float16,0,0.021045332153638203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,64,128,1,float16,fp8,0,0.02325333406527837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,64,128,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,64,0,1,float16,fp8,0,0.023237332701683044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,64,0,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,64,128,1,float16,float16,0,0.021365332106749218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,64,0,1,float16,float16,0,0.022842665513356526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,64,128,1,float16,fp8,0,0.021509334444999695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,64,128,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,64,0,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,64,0,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,64,128,1,float16,float16,0,0.021477334201335907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,64,0,1,float16,float16,0,0.021327999730904896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,64,128,1,float16,fp8,0,0.021509334444999695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,64,128,1,fp8,fp8,0,0.021407999098300934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,64,0,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,64,0,1,fp8,fp8,0,0.02103466788927714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,64,128,1,float16,float16,0,0.021402666966120403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,64,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,64,128,1,float16,fp8,0,0.02163200080394745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,64,128,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,64,0,1,float16,fp8,0,0.022831998765468597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,64,0,1,fp8,fp8,0,0.02176533391078313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,64,128,1,float16,float16,0,0.021066665649414062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,64,0,1,float16,float16,0,0.021781332790851593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,64,128,1,float16,fp8,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,64,128,1,fp8,fp8,0,0.021226666867733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,64,0,1,float16,fp8,0,0.02334933231274287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,64,0,1,fp8,fp8,0,0.021664001047611237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,64,128,1,float16,float16,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,64,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,64,128,1,float16,fp8,0,0.018842666099468868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,64,128,1,fp8,fp8,0,0.017504000415404636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,64,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,64,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,64,128,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,64,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,64,128,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,64,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,64,0,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,64,128,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,64,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,64,128,1,float16,fp8,0,0.018751999984184902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,64,128,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,64,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,64,128,1,float16,float16,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,64,0,1,float16,float16,0,0.017423999806245167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,64,128,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,64,128,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,64,128,1,float16,float16,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,64,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,64,128,1,float16,fp8,0,0.017727999637524288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,64,128,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,64,0,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,64,0,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,64,128,1,float16,float16,0,0.015322666615247726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,64,128,1,float16,fp8,0,0.017525333911180496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,64,128,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,64,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,64,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,64,128,1,float16,float16,0,0.015061333775520325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,64,0,1,float16,float16,0,0.016389333953460056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,64,128,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,64,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,64,0,1,fp8,fp8,0,0.016085332880417507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,64,128,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,64,0,1,fp8,fp8,0,0.01670933390657107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,64,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,64,128,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,64,128,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,64,128,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,64,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,64,0,1,fp8,fp8,0,0.01682666689157486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,64,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,64,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,64,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,64,128,1,fp8,fp8,0,0.015498666713635126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,64,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,64,128,1,float16,float16,0,0.015263999501864115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,64,0,1,float16,float16,0,0.01602666700879733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,64,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,64,128,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,64,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,64,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,64,128,1,float16,float16,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,64,128,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,64,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,64,128,1,float16,float16,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,64,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,64,128,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,64,128,1,fp8,fp8,0,0.016714667280515034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,64,0,1,float16,fp8,0,0.015413332730531693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,64,0,1,fp8,fp8,0,0.01659199967980385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,64,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,64,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,64,128,1,float16,fp8,0,0.018058666338523228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,64,128,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,64,0,1,float16,float16,0,0.01886933296918869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,64,128,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,64,128,1,fp8,fp8,0,0.015568000574906668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,64,0,1,float16,fp8,0,0.016645333419243496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,64,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,64,0,1,float16,float16,0,0.015743999431530636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,64,128,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,64,0,1,float16,fp8,0,0.017562666287024815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,64,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,64,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,64,128,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,64,128,1,float16,fp8,0,0.01802666609485944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,64,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,64,128,1,float16,float16,0,0.01571200042963028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,64,128,1,float16,fp8,0,0.018250666558742523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,64,128,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,64,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,64,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,64,0,1,float16,float16,0,0.1095360020796458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,64,128,1,float16,fp8,0,0.10954667131106059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,64,128,1,float16,float16,0,0.11192533373832703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,64,128,1,fp8,fp8,0,0.10113599896430969
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,64,0,1,float16,fp8,0,0.10949333508809407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,64,0,1,fp8,fp8,0,0.10168533523877461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,64,128,1,float16,float16,0,0.10975466171900432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,64,0,1,float16,float16,0,0.11149332920710246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,64,128,1,float16,fp8,0,0.11225066582361858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,64,128,1,fp8,fp8,0,0.10170666376749675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,64,0,1,float16,fp8,0,0.11146133144696553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,64,0,1,fp8,fp8,0,0.10285866260528564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,64,128,1,float16,float16,0,0.11173866192499797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,64,0,1,float16,float16,0,0.11155733466148376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,64,128,1,float16,fp8,0,0.11188800136248271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,64,128,1,fp8,fp8,0,0.10314666231473286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,64,0,1,float16,fp8,0,0.11146666606267293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,64,0,1,fp8,fp8,0,0.10513066252072652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,64,128,1,float16,float16,0,0.10994133353233337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,64,0,1,float16,float16,0,0.11176000038782756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,64,128,1,float16,fp8,0,0.11137066284815471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,64,128,1,fp8,fp8,0,0.10511466860771179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,64,0,1,float16,fp8,0,0.11161067088445027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,64,0,1,fp8,fp8,0,0.10549333691596985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,64,128,1,float16,float16,0,0.06464000046253204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,64,0,1,float16,float16,0,0.06413333117961884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,64,128,1,float16,fp8,0,0.0647680014371872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,64,128,1,fp8,fp8,0,0.0625439981619517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,64,0,1,float16,fp8,0,0.06439466774463654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,64,0,1,fp8,fp8,0,0.06282666822274525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,64,128,1,float16,float16,0,0.0645066648721695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,64,0,1,float16,float16,0,0.06406400104363759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,64,128,1,float16,fp8,0,0.0642080008983612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,64,0,1,float16,fp8,0,0.0647680014371872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,64,0,1,fp8,fp8,0,0.05860800047715505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,64,128,1,float16,float16,0,0.06442666550477345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,64,128,1,fp8,fp8,0,0.06088533500830332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,64,0,1,float16,float16,0,0.0644160012404124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,64,128,1,float16,fp8,0,0.06420266628265381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,64,128,1,fp8,fp8,0,0.05994133154551188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,64,0,1,float16,fp8,0,0.06467733283837636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,64,0,1,fp8,fp8,0,0.06005866825580597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,64,128,1,float16,float16,0,0.06459199885527293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,64,0,1,float16,float16,0,0.0643039991458257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,64,128,1,float16,fp8,0,0.06461866696675618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,64,128,1,fp8,fp8,0,0.06076799829800924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,64,0,1,float16,fp8,0,0.06446933249632518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,64,0,1,fp8,fp8,0,0.0620959997177124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,64,128,1,float16,float16,0,0.06447466711203258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,64,0,1,float16,float16,0,0.06435200075308482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,64,128,1,float16,fp8,0,0.06438399851322174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,64,128,1,fp8,fp8,0,0.06230400005976359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,64,0,1,fp8,fp8,0,0.06186666587988535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,64,128,1,float16,float16,0,0.037834666669368744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,64,0,1,float16,fp8,0,0.06469333171844482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,64,0,1,float16,float16,0,0.03947199881076813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,64,128,1,float16,fp8,0,0.03949866692225138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,64,128,1,fp8,fp8,0,0.03752533346414566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,64,0,1,fp8,fp8,0,0.03755199909210205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,64,0,1,float16,fp8,0,0.0395359992980957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,64,128,1,float16,float16,0,0.03972266614437103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,64,128,1,float16,fp8,0,0.03966933240493139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,64,0,1,float16,float16,0,0.03770133356253306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,64,128,1,fp8,fp8,0,0.03578133384386698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,64,0,1,float16,fp8,0,0.03940266619126002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,64,0,1,fp8,fp8,0,0.035674666364987694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,64,128,1,float16,float16,0,0.03765333443880081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,64,0,1,float16,float16,0,0.037530665596326195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,64,128,1,float16,fp8,0,0.03748800108830134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,64,128,1,fp8,fp8,0,0.0358240008354187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,64,0,1,float16,fp8,0,0.037632000943024956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,64,0,1,fp8,fp8,0,0.035690667728583016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,64,128,1,float16,float16,0,0.0393653338154157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,64,0,1,float16,float16,0,0.039493332306543984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,64,128,1,float16,fp8,0,0.03958400090535482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,64,128,1,fp8,fp8,0,0.037717332442601524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,64,0,1,float16,fp8,0,0.03965333352486292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,64,0,1,fp8,fp8,0,0.03772799919048945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,64,128,1,float16,float16,0,0.03944533318281174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,64,0,1,float16,float16,0,0.03957333415746689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,64,128,1,float16,fp8,0,0.037658666570981346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,64,128,1,fp8,fp8,0,0.037903999288876854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,64,0,1,float16,fp8,0,0.03955200066169103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,64,0,1,fp8,fp8,0,0.03756266583998998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,64,128,1,float16,float16,0,0.027776000400384266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,64,0,1,float16,float16,0,0.027162666122118633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,64,128,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,64,128,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,64,0,1,float16,fp8,0,0.027087998886903126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,64,0,1,fp8,fp8,0,0.027232001225153606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,64,128,1,float16,float16,0,0.025813333690166473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,64,0,1,float16,float16,0,0.025424001117547352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,64,128,1,float16,fp8,0,0.027258666853109997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,64,128,1,fp8,fp8,0,0.02569599946339925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,64,0,1,float16,fp8,0,0.026613332331180573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,64,0,1,fp8,fp8,0,0.025424001117547352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,64,128,1,float16,float16,0,0.02714666724205017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,64,0,1,float16,float16,0,0.02733866622050603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,64,128,1,float16,fp8,0,0.026890667776266735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,64,128,1,fp8,fp8,0,0.025610665480295818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,64,0,1,float16,fp8,0,0.027503999571005504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,64,0,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,64,128,1,float16,float16,0,0.026880001028378803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,64,0,1,float16,float16,0,0.027301333844661713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,64,128,1,float16,fp8,0,0.026314665873845417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,64,128,1,fp8,fp8,0,0.025392000873883564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,64,0,1,float16,fp8,0,0.02741866558790207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,64,0,1,fp8,fp8,0,0.025413334369659424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,64,128,1,float16,float16,0,0.027327999472618103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,64,0,1,float16,float16,0,0.027215999861558277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,64,128,1,float16,fp8,0,0.027317332724730175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,64,128,1,fp8,fp8,0,0.02716800073782603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,64,0,1,float16,fp8,0,0.027109332382678986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,64,0,1,fp8,fp8,0,0.027242665489514668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,64,128,1,float16,float16,0,0.02130666623512904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,64,0,1,float16,float16,0,0.021018666525681812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,64,128,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,64,128,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,64,0,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,64,0,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,64,128,1,float16,float16,0,0.0210506667693456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,64,0,1,float16,float16,0,0.019695999721686046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,64,128,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,64,128,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,64,0,1,float16,fp8,0,0.019386666516462963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,64,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,64,128,1,float16,float16,0,0.019402666638294857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,64,0,1,float16,float16,0,0.019434666881958645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,64,128,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,64,128,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,64,0,1,float16,fp8,0,0.019487999379634857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,64,0,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,64,128,1,float16,float16,0,0.019306667149066925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,64,0,1,float16,float16,0,0.021216000119845074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,64,128,1,float16,fp8,0,0.019445333629846573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,64,128,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,64,0,1,float16,fp8,0,0.021104000508785248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,64,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,64,128,1,float16,float16,0,0.021301334102948506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,64,128,1,float16,fp8,0,0.019434666881958645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,64,0,1,float16,float16,0,0.021397332350413006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,64,128,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,64,0,1,float16,fp8,0,0.019386666516462963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,64,0,1,fp8,fp8,0,0.021082667013009388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,64,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,64,128,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,64,128,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,64,128,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,64,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,64,128,1,float16,fp8,0,0.016783999900023144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,64,128,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,64,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,64,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,64,128,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,64,0,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,64,128,1,float16,float16,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,64,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,64,128,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,64,128,1,float16,fp8,0,0.01782400036851565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,64,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,64,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,64,128,1,float16,float16,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,64,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,64,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,64,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,64,128,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,64,0,1,float16,float16,0,0.015461333096027374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,64,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,64,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,64,0,1,float16,fp8,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,64,128,1,float16,float16,0,0.01553600033124288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,64,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,64,128,1,float16,fp8,0,0.0174346665541331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,64,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,64,0,1,fp8,fp8,0,0.01534933348496755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,64,128,1,float16,float16,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,64,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,64,128,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,64,0,1,fp8,fp8,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,64,128,1,float16,float16,0,0.015423999478419622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,64,128,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,64,128,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,64,0,1,float16,fp8,0,0.015487999965747198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,64,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,64,128,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,64,0,1,float16,float16,0,0.015376000354687372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,64,128,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,64,0,1,fp8,fp8,0,0.016010666886965435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,64,128,1,float16,float16,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,64,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,64,128,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,64,128,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,64,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,64,0,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,64,128,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,64,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,64,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,64,128,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,64,0,1,float16,fp8,0,0.016149333367745083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,64,128,1,float16,float16,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,64,0,1,fp8,fp8,0,0.01543466622630755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,64,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,64,128,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,64,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,64,128,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,64,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,64,128,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,64,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,64,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,64,128,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,64,128,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,64,0,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,64,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,64,128,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,64,128,1,fp8,fp8,0,0.015397333850463232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,64,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,64,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,64,128,1,float16,float16,0,0.015050667027632395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,64,128,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,64,128,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,64,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,64,128,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,64,0,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,64,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,64,128,1,float16,float16,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,64,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,64,128,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,64,128,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,64,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,64,0,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,64,128,1,float16,float16,0,0.09922132889429729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,64,0,1,float16,float16,0,0.09921600421269734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,64,128,1,float16,fp8,0,0.09940800070762634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,64,128,1,fp8,fp8,0,0.09103467067082723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,64,0,1,float16,fp8,0,0.09813867012659709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,64,0,1,fp8,fp8,0,0.09116799632708232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,64,128,1,float16,float16,0,0.09803199768066406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,64,128,1,float16,fp8,0,0.09817066788673401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,64,0,1,float16,float16,0,0.09970133503278096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,64,128,1,fp8,fp8,0,0.09080533186594646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,64,0,1,float16,fp8,0,0.09726400176684062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,64,128,1,float16,float16,0,0.09916800260543823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,64,0,1,fp8,fp8,0,0.09130133191744487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,64,0,1,float16,float16,0,0.09909866253534953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,64,128,1,float16,fp8,0,0.0990133285522461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,64,128,1,fp8,fp8,0,0.09378666679064433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,64,0,1,float16,fp8,0,0.09954133629798889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,64,0,1,fp8,fp8,0,0.09296000003814697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,64,128,1,float16,float16,0,0.09975999593734741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,64,0,1,float16,float16,0,0.09941866993904114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,64,128,1,float16,fp8,0,0.09915199875831604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,64,128,1,fp8,fp8,0,0.09325333436330159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,64,0,1,float16,fp8,0,0.09927999973297119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,64,0,1,fp8,fp8,0,0.0928053359190623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,64,128,1,float16,float16,0,0.05640000104904175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,64,0,1,float16,float16,0,0.05810666580994924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,64,128,1,float16,fp8,0,0.05611733098824819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,64,128,1,fp8,fp8,0,0.054383998115857445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,64,0,1,float16,fp8,0,0.056320001681645714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,64,0,1,fp8,fp8,0,0.05397866666316986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,64,128,1,float16,float16,0,0.056474665800730385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,64,0,1,float16,float16,0,0.05608533322811127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,64,128,1,float16,fp8,0,0.05598933498064677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,64,128,1,fp8,fp8,0,0.05392000079154968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,64,0,1,float16,fp8,0,0.05622933308283488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,64,0,1,fp8,fp8,0,0.05562133093674978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,64,128,1,float16,float16,0,0.05614933371543884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,64,0,1,float16,float16,0,0.05612266560395559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,64,128,1,float16,fp8,0,0.056362668673197426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,64,128,1,fp8,fp8,0,0.05230399966239929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,64,0,1,float16,fp8,0,0.05593066910902659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,64,0,1,fp8,fp8,0,0.05389333268006643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,64,128,1,float16,float16,0,0.05606399973233541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,64,128,1,float16,fp8,0,0.057333335280418396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,64,0,1,float16,float16,0,0.056159997979799904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,64,128,1,fp8,fp8,0,0.054058666030565895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,64,0,1,float16,fp8,0,0.05603733162085215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,64,0,1,fp8,fp8,0,0.05392000079154968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,64,0,1,float16,float16,0,0.056176001826922096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,64,128,1,float16,float16,0,0.05823466678460439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,64,128,1,float16,fp8,0,0.05619200070699056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,64,128,1,fp8,fp8,0,0.05365333457787832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,64,0,1,float16,fp8,0,0.05811200042565664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,64,128,1,float16,float16,0,0.035391998787721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,64,0,1,float16,float16,0,0.035455999275048576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,64,0,1,fp8,fp8,0,0.054101333022117615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,64,128,1,float16,fp8,0,0.035605333745479584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,64,128,1,fp8,fp8,0,0.033674667278925575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,64,0,1,float16,fp8,0,0.03566933423280716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,64,0,1,fp8,fp8,0,0.03368533402681351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,64,128,1,float16,float16,0,0.0355679988861084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,64,0,1,float16,float16,0,0.03329066683848699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,64,128,1,float16,fp8,0,0.03537066777547201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,64,128,1,fp8,fp8,0,0.03322133421897888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,64,0,1,float16,fp8,0,0.03562133262554804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,64,0,1,fp8,fp8,0,0.03161066770553589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,64,128,1,float16,float16,0,0.03551466763019562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,64,0,1,float16,float16,0,0.03356799980004629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,64,128,1,float16,fp8,0,0.03417066733042399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,64,128,1,fp8,fp8,0,0.031504000226656594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,64,0,1,float16,fp8,0,0.03374933451414108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,64,0,1,fp8,fp8,0,0.033013333876927696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,64,0,1,float16,float16,0,0.0337119996547699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,64,128,1,float16,float16,0,0.03570666660865148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,64,128,1,float16,fp8,0,0.033904001116752625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,64,128,1,fp8,fp8,0,0.033413333197434746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,64,0,1,float16,fp8,0,0.03555733213822047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,64,128,1,float16,float16,0,0.03364266703526179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,64,0,1,fp8,fp8,0,0.03400533397992452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,64,0,1,float16,float16,0,0.035802667339642845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,64,128,1,float16,fp8,0,0.03549333413441976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,64,128,1,fp8,fp8,0,0.033301333586374916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,64,0,1,float16,fp8,0,0.03350933392842611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,64,0,1,fp8,fp8,0,0.03330666571855545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,64,128,1,float16,float16,0,0.02510400116443634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,64,0,1,float16,float16,0,0.025439999997615814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,64,128,1,float16,fp8,0,0.025360000630219776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,64,128,1,fp8,fp8,0,0.02385599911212921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,64,0,1,float16,fp8,0,0.02534399926662445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,64,0,1,fp8,fp8,0,0.02513066679239273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,64,128,1,float16,float16,0,0.02554133286078771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,64,0,1,float16,float16,0,0.025194667279720306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,64,128,1,float16,fp8,0,0.02508266766866048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,64,128,1,fp8,fp8,0,0.023365333676338196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,64,0,1,float16,fp8,0,0.02565866708755493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,64,0,1,fp8,fp8,0,0.023557332654794056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,64,128,1,float16,float16,0,0.025173333783944447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,64,0,1,float16,float16,0,0.025631998976071674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,64,128,1,float16,fp8,0,0.025749333202838898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,64,128,1,fp8,fp8,0,0.023423999547958374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,64,0,1,float16,fp8,0,0.025653332471847534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,64,0,1,fp8,fp8,0,0.02526933451493581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,64,128,1,float16,float16,0,0.02532266577084859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,64,0,1,float16,float16,0,0.025263999899228413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,64,128,1,float16,fp8,0,0.025349333882331848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,64,128,1,fp8,fp8,0,0.025413334369659424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,64,0,1,float16,fp8,0,0.025013332565625507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,64,0,1,fp8,fp8,0,0.025231999655564625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,64,128,1,float16,float16,0,0.02516799916823705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,64,0,1,float16,float16,0,0.027077332139015198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,64,128,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,64,128,1,fp8,fp8,0,0.023520000278949738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,64,0,1,float16,fp8,0,0.025349333882331848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,64,128,1,float16,float16,0,0.01918399954835574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,64,0,1,fp8,fp8,0,0.02362666775782903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,64,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,64,128,1,float16,fp8,0,0.01937599976857503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,64,128,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,64,0,1,float16,fp8,0,0.01987733319401741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,64,128,1,float16,float16,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,64,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,64,0,1,float16,float16,0,0.01960533360640208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,64,0,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,64,128,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,64,128,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,64,0,1,fp8,fp8,0,0.01878400022784869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,64,128,1,float16,float16,0,0.019023999571800232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,64,0,1,float16,float16,0,0.019023999571800232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,64,128,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,64,128,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,64,0,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,64,0,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,64,128,1,float16,float16,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,64,0,1,float16,float16,0,0.018863999595244724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,64,0,1,float16,fp8,0,0.019861333072185516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,64,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,64,128,1,float16,fp8,0,0.019461333751678467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,64,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,64,128,1,float16,float16,0,0.01904533306757609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,64,0,1,float16,float16,0,0.019066666563351948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,64,128,1,float16,fp8,0,0.019695999721686046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,64,128,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,64,0,1,float16,fp8,0,0.019754666835069656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,64,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,64,128,1,float16,float16,0,0.016794666647911072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,64,0,1,float16,float16,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,64,128,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,64,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,64,128,1,float16,float16,0,0.016794666647911072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,64,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,64,128,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,64,128,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,64,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,64,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,64,128,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,64,128,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,64,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,64,0,1,float16,float16,0,0.01887999971707662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,64,128,1,fp8,fp8,0,0.016293333222468693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,64,128,1,float16,float16,0,0.014991999914248785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,64,0,1,float16,float16,0,0.016250666230916977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,64,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,64,128,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,64,128,1,float16,float16,0,0.014991999914248785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,64,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,64,128,1,float16,fp8,0,0.01661866654952367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,64,128,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,64,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,64,0,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,64,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,64,128,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,64,128,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,64,0,1,float16,fp8,0,0.016069332758585613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,64,128,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,64,0,1,float16,float16,0,0.017397332936525345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,64,128,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,64,128,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,64,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,64,128,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,64,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,64,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,64,128,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,64,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,64,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,64,128,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,64,128,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,64,0,1,fp8,fp8,0,0.01752000053723653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,64,128,1,fp8,fp8,0,0.017370666066805523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,64,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,64,0,1,float16,float16,0,0.01739199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,64,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,64,0,1,float16,fp8,0,0.015664000064134598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,64,128,1,float16,float16,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,64,128,1,float16,fp8,0,0.015429332852363586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,64,0,1,float16,float16,0,0.01581866666674614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,64,128,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,64,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,64,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,64,128,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,64,128,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,64,128,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,64,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,64,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,64,128,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,64,128,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,64,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,64,0,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,64,128,1,float16,float16,0,0.016778666526079178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,64,128,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,64,128,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,64,128,1,float16,float16,0,0.015103999525308609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,64,0,1,float16,float16,0,0.016063999384641647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,64,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,64,128,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,64,0,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,64,0,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,64,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,64,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,64,0,1,fp8,fp8,0,0.015439999600251516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,0,0.08520533641179402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,0,0.08473599950472514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,0,0.08494399984677632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,1,64,128,1,fp8,fp8,0,0.07861333092053731
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,0,0.08504000306129456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,1,64,0,1,fp8,fp8,0,0.07887466748555501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,0,0.08457066615422566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,0,0.08468266328175862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,0,0.08506666620572408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,2,64,128,1,fp8,fp8,0,0.07884799937407176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,0,0.08480000495910645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,2,64,0,1,fp8,fp8,0,0.07871466875076294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,0,0.08504000306129456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,0,0.08476799726486206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,0,0.08481599887212117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,4,64,128,1,fp8,fp8,0,0.07860266665617625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,0,0.08502399921417236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,4,64,0,1,fp8,fp8,0,0.07876266539096832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,0,0.08552533388137817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,0,0.08522666494051616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,8,64,128,1,fp8,fp8,0,0.07906133433183034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,0,0.08628267049789429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,0,0.08497599760691325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,8,64,0,1,fp8,fp8,0,0.07898666461308797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,16,64,128,1,float16,float16,0,0.050240000089009605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,0,0.05011733373006185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,16,64,128,1,float16,fp8,0,0.049679999550183616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,16,64,128,1,fp8,fp8,0,0.047770669062932335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,0,0.05000533163547516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,16,64,0,1,fp8,fp8,0,0.04631466666857401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,0,0.04995200037956238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,0,0.05003199974695841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,0,0.04980800052483877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,1,64,128,1,fp8,fp8,0,0.04574933151404063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,0,0.05004799862702688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,1,64,0,1,fp8,fp8,0,0.04597333570321401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,0,0.050053333242734276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,0,0.04979733129342397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,0,0.05051200091838837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,2,64,128,1,fp8,fp8,0,0.04789333542188009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,0,0.050111999114354454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,2,64,0,1,fp8,fp8,0,0.04764266808827718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,0,0.05009066561857859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,0,0.0498879998922348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,0,0.04986133178075155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,4,64,128,1,fp8,fp8,0,0.04788800080617269
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,0,0.05026666820049286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,4,64,0,1,fp8,fp8,0,0.04603200157483419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,0,0.04974400003751119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,0,0.05007466673851013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,0,0.05020266771316528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,8,64,128,1,fp8,fp8,0,0.04603200157483419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,0,0.049728001157442726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,8,64,0,1,fp8,fp8,0,0.04602666695912679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,16,64,128,1,float16,float16,0,0.031925333042939506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,0,0.03130666663249334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,16,64,128,1,float16,fp8,0,0.03338133295377096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,16,64,128,1,fp8,fp8,0,0.03133866687615713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,0,0.032399999598662056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,16,64,0,1,fp8,fp8,0,0.03142400085926056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,0,0.031386665999889374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,0,0.03161599983771642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,0,0.03350933392842611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,1,64,128,1,fp8,fp8,0,0.029685333371162415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,0,0.03145066648721695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,1,64,0,1,fp8,fp8,0,0.03107733279466629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,0,0.03133866687615713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,0,0.03164266546567281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,0,0.03169599920511246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,2,64,128,1,fp8,fp8,0,0.03018666555484136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,0,0.031301334500312805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,2,64,0,1,fp8,fp8,0,0.029882666965325672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,0,0.03310399999221166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,0,0.0316746657093366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,0,0.03173866619666418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,4,64,128,1,fp8,fp8,0,0.03148266673088074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,0,0.03160533308982849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,4,64,0,1,fp8,fp8,0,0.029578665892283123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,0,0.03315199911594391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,0,0.031632001201311745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,0,0.03340800106525421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,8,64,128,1,fp8,fp8,0,0.031583999594052635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,0,0.03334933271010717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,16,64,128,1,float16,float16,0,0.02311466634273529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,8,64,0,1,fp8,fp8,0,0.031311998764673867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,16,64,128,1,float16,fp8,0,0.023472001155217487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,16,64,128,1,fp8,fp8,0,0.023045333723227184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,0,0.023498666783173878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,16,64,0,1,fp8,fp8,0,0.02310933421055476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,0,0.0236160010099411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,0,0.024698667228221893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,0,0.025205334027608235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,1,64,128,1,fp8,fp8,0,0.023733332753181458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,1,64,0,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,0,0.024906667570273083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,0,0.025199999411900837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,0,0.02515200028816859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,2,64,128,1,fp8,fp8,0,0.023498666783173878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,0,0.025221332907676697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,2,64,0,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,0,0.025125332176685333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,0,0.0233599990606308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,0,0.0234400009115537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,4,64,128,1,fp8,fp8,0,0.02311466634273529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,0,0.02334933231274287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,4,64,0,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,0,0.02367466688156128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,0,0.023365333676338196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,0,0.025386666258176167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,8,64,128,1,fp8,fp8,0,0.02330133318901062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,0,0.02345066765944163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,16,64,128,1,float16,float16,0,0.019167999426523846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,8,64,0,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,0,0.01934933289885521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,16,64,128,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,16,64,128,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,16,64,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,0,0.019930666933457058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,0,0.019354666272799175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,0,0.019354666272799175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,0,0.02120000123977661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,1,64,128,1,fp8,fp8,0,0.019909333437681198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,1,64,0,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,0,0.019343999524911244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,2,64,128,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,0,0.020506666352351505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,2,64,0,1,fp8,fp8,0,0.01945066700379054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,0,0.01912533367673556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,0,0.018981333822011948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,0,0.019391999890406925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,4,64,128,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,4,64,0,1,fp8,fp8,0,0.018805333723624546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,0,0.01926400015751521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,0,0.020768000433842342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,8,64,128,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,8,64,0,1,fp8,fp8,0,0.019839999576409657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,16,64,128,1,float16,float16,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,16,64,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,16,64,128,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,16,64,0,1,fp8,fp8,0,0.016783999900023144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,0,0.015354666858911514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,1,64,128,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,1,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,0,0.017557332913080852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,2,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,2,64,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,4,64,128,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,4,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,8,64,128,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,8,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,16,64,128,1,float16,float16,0,0.015354666858911514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,16,64,128,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,16,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,16,64,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,1,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,1,64,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,0,0.015360000232855478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,0,0.016623999923467636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,2,64,128,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,2,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,4,64,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,4,64,0,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,8,64,128,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,8,64,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,16,64,128,1,float16,float16,0,0.014906667172908783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,0,0.015509333461523056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,16,64,128,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,16,64,128,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,16,64,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,0,0.014815999815861383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,1,64,128,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,0,0.016773333152135212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,1,64,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,0,0.015386667102575302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,2,64,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,2,64,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,4,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,0,0.014858666807413101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,4,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,8,64,128,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,8,64,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,16,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,16,64,128,1,float16,fp8,0,0.01764800027012825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,16,64,128,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,16,64,0,1,fp8,fp8,0,0.015461333096027374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,0,0.014869333555301031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,1,64,128,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,0,0.015541333705186844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,1,64,0,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,0,0.015685333559910457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,2,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,2,64,0,1,fp8,fp8,0,0.01651200031240781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,0,0.015722667177518208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,4,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,0,0.015834666788578033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,4,64,0,1,fp8,fp8,0,0.016250666230916977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,8,64,128,1,fp8,fp8,0,0.015504000087579092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,8,64,0,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,64,128,1,float16,float16,0,0.6900746822357178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,64,128,1,float16,fp8,0,0.6970826784769694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,64,128,1,fp8,fp8,0,0.6351679960886637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,64,0,1,float16,float16,0,4.1659361521403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,64,0,1,float16,fp8,0,4.23799991607666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,64,128,1,float16,float16,0,0.7019466559092203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,64,0,1,fp8,fp8,0,3.7685438791910806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,64,128,1,fp8,fp8,0,0.6503093242645264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,64,128,1,float16,fp8,0,0.7258613109588623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,64,128,1,float16,float16,0,0.720853328704834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,64,0,1,float16,float16,0,4.249253273010254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,64,0,1,float16,fp8,0,4.131946563720703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,64,128,1,float16,fp8,0,0.7271626790364584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,64,0,1,fp8,fp8,0,3.7819948196411133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,64,128,1,fp8,fp8,0,0.675322691599528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,64,0,1,float16,float16,0,4.132757186889648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,64,128,1,float16,float16,0,0.41259201367696124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,64,128,1,float16,fp8,0,0.4214506546656291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,64,128,1,fp8,fp8,0,0.393120010693868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,64,0,1,float16,float16,0,2.156709353129069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,64,0,1,fp8,fp8,0,3.8033812840779624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,64,0,1,float16,fp8,0,4.176981290181478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,64,128,1,float16,float16,0,0.3689333200454712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,64,128,1,float16,fp8,0,0.3731253147125244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,64,0,1,float16,fp8,0,2.164720058441162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,64,0,1,fp8,fp8,0,2.0088000297546387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,64,128,1,fp8,fp8,0,0.3426613410313924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,64,0,1,float16,float16,0,2.127120018005371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,64,128,1,float16,float16,0,0.37358399232228595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,64,128,1,float16,fp8,0,0.3779413302739461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,64,0,1,float16,fp8,0,2.1107306480407715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,64,0,1,fp8,fp8,0,1.9562026659647624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,64,128,1,fp8,fp8,0,0.34707732995351154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,64,0,1,float16,float16,0,2.111482620239258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,64,128,1,float16,float16,0,0.381930669148763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,64,128,1,float16,fp8,0,0.3864693244298299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,64,0,1,float16,fp8,0,2.114741325378418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,64,0,1,fp8,fp8,0,1.9612852732340496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,64,128,1,fp8,fp8,0,0.35707199573516846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,64,128,1,float16,float16,0,0.23501867055892944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,64,0,1,float16,float16,0,2.136693318684896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,64,128,1,float16,fp8,0,0.2407146692276001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,64,128,1,fp8,fp8,0,0.22634132703145346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,64,0,1,float16,fp8,0,2.1268320083618164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,64,0,1,float16,float16,0,1.1578506628672283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,64,0,1,fp8,fp8,0,1.9699573516845703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,64,128,1,float16,float16,0,0.20986666282018027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,64,128,1,float16,fp8,0,0.21191465854644775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,64,0,1,fp8,fp8,0,1.079909324645996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,64,0,1,float16,fp8,0,1.1614346504211426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,64,0,1,float16,float16,0,1.129647970199585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,64,128,1,fp8,fp8,0,0.1995946764945984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,64,128,1,float16,float16,0,0.2137599984804789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,64,128,1,float16,fp8,0,0.21534399191538492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,64,0,1,fp8,fp8,0,1.0522720019022624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,64,0,1,float16,fp8,0,1.1322613557179768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,64,0,1,float16,float16,0,1.1308213075002034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,64,128,1,fp8,fp8,0,0.20152533054351807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,64,128,1,float16,float16,0,0.21972266832987467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,64,0,1,float16,fp8,0,1.1345973014831543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,64,0,1,fp8,fp8,0,1.0571946303049724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,64,128,1,float16,fp8,0,0.22214933236440024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,64,0,1,float16,float16,0,1.1408320267995198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,64,128,1,fp8,fp8,0,0.2085919976234436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,64,128,1,float16,float16,0,0.1686720053354899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,64,0,1,float16,fp8,0,1.14246932665507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,64,128,1,float16,fp8,0,0.16892266273498535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,64,0,1,fp8,fp8,0,1.061018705368042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,64,0,1,float16,float16,0,0.6824159622192383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,64,128,1,fp8,fp8,0,0.15849600235621134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,64,128,1,float16,float16,0,0.165610671043396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,64,0,1,fp8,fp8,0,0.6341813405354818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,64,0,1,float16,fp8,0,0.6830613613128662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,64,128,1,float16,fp8,0,0.16506133476893106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,64,128,1,fp8,fp8,0,0.1545973320802053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,64,0,1,float16,float16,0,0.6805706818898519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,64,0,1,fp8,fp8,0,0.6313226620356241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,64,128,1,float16,float16,0,0.16476800044377646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,64,0,1,float16,fp8,0,0.6776266892751058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,64,128,1,float16,fp8,0,0.16511999567349753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,64,128,1,fp8,fp8,0,0.1544319987297058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,64,0,1,float16,float16,0,0.6784053643544515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,64,0,1,fp8,fp8,0,0.63155198097229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,64,0,1,float16,fp8,0,0.6792799631754557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,64,128,1,float16,float16,0,0.1660533348719279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,64,128,1,fp8,fp8,0,0.15661866466204324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,64,128,1,float16,fp8,0,0.16639467080434164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,64,0,1,float16,float16,0,0.6791146596272787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,64,0,1,fp8,fp8,0,0.632149338722229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,64,0,1,float16,fp8,0,0.678704023361206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,64,128,1,float16,float16,0,0.520522673924764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,64,128,1,float16,fp8,0,0.5248533487319946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,64,128,1,fp8,fp8,0,0.4771413405736287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,64,0,1,float16,float16,0,2.4217653274536133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,64,128,1,float16,float16,0,0.5288373231887817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,64,0,1,float16,fp8,0,2.42523193359375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,64,0,1,fp8,fp8,0,2.2413439750671387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,64,128,1,float16,fp8,0,0.5331519842147827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,64,128,1,fp8,fp8,0,0.4868533213933309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,64,128,1,float16,float16,0,0.5402026573816935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,64,0,1,float16,float16,0,2.4265333811442056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,64,128,1,float16,fp8,0,0.5456746816635132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,64,0,1,float16,fp8,0,2.443461259206136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,64,0,1,fp8,fp8,0,2.253215948740641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,64,128,1,fp8,fp8,0,0.5033119916915894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,64,128,1,float16,float16,0,0.3145599961280823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,64,0,1,float16,float16,0,2.44868803024292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,64,128,1,float16,fp8,0,0.32226133346557617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,64,0,1,fp8,fp8,0,2.268821398417155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,64,0,1,float16,fp8,0,2.501258691151937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,64,128,1,fp8,fp8,0,0.2998613317807515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,64,0,1,float16,float16,0,1.3084746996561687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,64,128,1,float16,float16,0,0.28035199642181396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,64,128,1,float16,fp8,0,0.28353599707285565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,64,0,1,float16,fp8,0,1.3150560061136882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,64,0,1,fp8,fp8,0,1.2190399964650471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,64,0,1,float16,float16,0,1.269546667734782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,64,128,1,fp8,fp8,0,0.26308266321818036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,64,128,1,float16,float16,0,0.2836266756057739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,64,128,1,float16,fp8,0,0.2866986592610677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,64,0,1,float16,fp8,0,1.272858699162801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,64,0,1,fp8,fp8,0,1.179866631825765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,64,0,1,float16,float16,0,1.2716960112253826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,64,128,1,fp8,fp8,0,0.26549333333969116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,64,128,1,float16,float16,0,0.2917813261349996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,64,0,1,fp8,fp8,0,1.1854026317596436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,64,0,1,float16,fp8,0,1.277018706003825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,64,128,1,float16,fp8,0,0.2948373357454936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,64,128,1,fp8,fp8,0,0.27342400948206586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,64,0,1,float16,float16,0,1.2798613707224529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,64,128,1,float16,float16,0,0.17882132530212402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,64,128,1,float16,fp8,0,0.18331199884414673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,64,0,1,float16,float16,0,0.7195306619008383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,64,0,1,float16,fp8,0,1.28657062848409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,64,0,1,fp8,fp8,0,1.1913599967956543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,64,128,1,fp8,fp8,0,0.17294400930404663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,64,128,1,float16,float16,0,0.15853866934776306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,64,0,1,float16,fp8,0,0.7242933114369711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,64,0,1,fp8,fp8,0,0.6719840367635092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,64,128,1,float16,fp8,0,0.1607146660486857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,64,0,1,float16,float16,0,0.6944853464762369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,64,128,1,fp8,fp8,0,0.14969066778818765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,64,128,1,float16,float16,0,0.16084800163904825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,64,0,1,fp8,fp8,0,0.645797332127889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,64,0,1,float16,fp8,0,0.6950080394744873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,64,128,1,float16,fp8,0,0.1612213353315989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,64,128,1,fp8,fp8,0,0.15247467160224915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,64,0,1,float16,float16,0,0.698533296585083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,64,128,1,float16,float16,0,0.16473600268363953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,64,0,1,float16,fp8,0,0.6983679930369059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,64,0,1,fp8,fp8,0,0.6490559975306193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,64,128,1,float16,fp8,0,0.1668000022570292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,64,0,1,float16,float16,0,0.7016692956288656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,64,128,1,fp8,fp8,0,0.15954132874806723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,64,128,1,float16,float16,0,0.12574399511019388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,64,0,1,float16,float16,0,0.44015467166900635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,64,0,1,float16,fp8,0,0.7030133406321207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,64,128,1,float16,fp8,0,0.12638933459917703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,64,0,1,fp8,fp8,0,0.656490683555603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,64,128,1,fp8,fp8,0,0.1200213332970937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,64,128,1,float16,float16,0,0.12567466497421265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,64,0,1,float16,fp8,0,0.43856000900268555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,64,0,1,fp8,fp8,0,0.4078133503595988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,64,128,1,float16,fp8,0,0.1239359974861145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,64,0,1,float16,float16,0,0.43742398420969647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,64,128,1,fp8,fp8,0,0.11938666303952535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,64,0,1,float16,fp8,0,0.43644265333811444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,64,128,1,float16,float16,0,0.1237493356068929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,64,0,1,fp8,fp8,0,0.4049439827601115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,64,128,1,float16,fp8,0,0.12396267056465149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,64,128,1,fp8,fp8,0,0.11954133709271748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,64,0,1,float16,float16,0,0.4361013174057007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,64,128,1,float16,float16,0,0.12385599811871846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,64,0,1,float16,fp8,0,0.437391996383667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,64,0,1,fp8,fp8,0,0.4045333464940389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,64,128,1,float16,fp8,0,0.12577066818873087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,64,128,1,fp8,fp8,0,0.11787199974060059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,64,0,1,float16,float16,0,0.4365546703338623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,64,0,1,fp8,fp8,0,0.4046773513158162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,64,0,1,float16,fp8,0,0.43675732612609863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,64,128,1,float16,float16,0,0.4360479911168416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,64,128,1,float16,fp8,0,0.4392533302307129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,64,128,1,fp8,fp8,0,0.40089599291483563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,64,0,1,float16,float16,0,1.7556586265563965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,64,0,1,float16,fp8,0,1.7628533045450847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,64,128,1,float16,float16,0,0.44137601057688397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,64,0,1,fp8,fp8,0,1.626981258392334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,64,128,1,float16,fp8,0,0.4458986520767212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,64,128,1,fp8,fp8,0,0.40836799144744873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,64,0,1,float16,float16,0,1.7643839518229167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,64,0,1,fp8,fp8,0,1.6331253051757812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,64,128,1,float16,float16,0,0.4522720177968343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,64,0,1,float16,fp8,0,1.765936056772868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,64,128,1,float16,fp8,0,0.45654932657877606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,64,128,1,fp8,fp8,0,0.4196586608886719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,64,0,1,float16,float16,0,1.7771520614624023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,64,128,1,float16,float16,0,0.2621813416481018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,64,128,1,float16,fp8,0,0.26924266417821247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,64,0,1,float16,fp8,0,1.7795039812723796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,64,128,1,fp8,fp8,0,0.25165865818659466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,64,0,1,float16,float16,0,0.9610453446706136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,64,0,1,fp8,fp8,0,1.6460639635721843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,64,128,1,float16,float16,0,0.23227733373641968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,64,0,1,float16,fp8,0,0.9663253625233968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,64,128,1,float16,fp8,0,0.23438400030136108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,64,0,1,fp8,fp8,0,0.8957653045654297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,64,128,1,fp8,fp8,0,0.2201226751009623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,64,0,1,float16,float16,0,0.9273119767506918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,64,128,1,float16,float16,0,0.23643199602762857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,64,0,1,fp8,fp8,0,0.8629706700642904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,64,0,1,float16,fp8,0,0.9305120309193929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,64,128,1,float16,fp8,0,0.23840532700220743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,64,0,1,float16,float16,0,0.9312106768290201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,64,128,1,fp8,fp8,0,0.2239146629969279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,64,128,1,float16,float16,0,0.2444480061531067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,64,0,1,float16,fp8,0,0.9336960315704346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,64,0,1,fp8,fp8,0,0.8654239972432455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,64,128,1,float16,fp8,0,0.2467093269030253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,64,0,1,float16,float16,0,0.9380693435668945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,64,128,1,fp8,fp8,0,0.2301279902458191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,64,128,1,float16,float16,0,0.15288000305493674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,64,0,1,float16,fp8,0,0.9414453506469727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,64,0,1,fp8,fp8,0,0.8736266295115153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,64,128,1,float16,fp8,0,0.15677332878112793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,64,0,1,float16,float16,0,0.5374826590220133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,64,128,1,fp8,fp8,0,0.1493226687113444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,64,128,1,float16,float16,0,0.1357919971148173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,64,0,1,fp8,fp8,0,0.5008586645126343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,64,0,1,float16,fp8,0,0.5422773361206055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,64,128,1,float16,fp8,0,0.136053333679835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,64,0,1,float16,float16,0,0.5157279968261719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,64,128,1,fp8,fp8,0,0.12594667077064514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,64,128,1,float16,float16,0,0.13725333412488303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,64,0,1,float16,fp8,0,0.5193973382314047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,64,0,1,fp8,fp8,0,0.4777973492940267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,64,0,1,float16,float16,0,0.5168426831563314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,64,128,1,float16,fp8,0,0.13806399703025818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,64,128,1,fp8,fp8,0,0.13008000453313193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,64,128,1,float16,float16,0,0.14011733730634054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,64,0,1,float16,fp8,0,0.5204746723175049
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,64,0,1,fp8,fp8,0,0.48025067647298175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,64,0,1,float16,float16,0,0.522490660349528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,64,128,1,float16,fp8,0,0.1421440045038859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,64,128,1,fp8,fp8,0,0.13632532954216003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,64,128,1,float16,float16,0,0.11147733529408772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,64,0,1,float16,float16,0,0.3359893163045247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,64,0,1,float16,fp8,0,0.5207680066426595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,64,0,1,fp8,fp8,0,0.4872426589330037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,64,128,1,float16,fp8,0,0.11154666543006897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,64,128,1,fp8,fp8,0,0.10603200395901997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,64,128,1,float16,float16,0,0.11159466703732808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,64,0,1,float16,fp8,0,0.33527998129526776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,64,0,1,fp8,fp8,0,0.31037867069244385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,64,128,1,float16,fp8,0,0.10938133796056111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,64,0,1,float16,float16,0,0.33506667613983154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,64,128,1,fp8,fp8,0,0.10532800356547038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,64,0,1,float16,fp8,0,0.33399999141693115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,64,128,1,float16,float16,0,0.11010133226712544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,64,0,1,fp8,fp8,0,0.30853867530822754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,64,128,1,float16,fp8,0,0.10966400305430095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,64,0,1,float16,float16,0,0.3333280086517334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,64,128,1,fp8,fp8,0,0.10356266299883525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,64,0,1,float16,fp8,0,0.332805335521698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,64,128,1,float16,float16,0,0.10977066556612651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,64,0,1,fp8,fp8,0,0.31014933188756305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,64,0,1,float16,float16,0,0.3349279959996541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,64,128,1,float16,fp8,0,0.10948800047238667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,64,128,1,fp8,fp8,0,0.10368000467618306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,64,0,1,float16,fp8,0,0.3352959950764974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,64,0,1,fp8,fp8,0,0.3095146616299947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,64,128,1,float16,float16,0,0.6737333138783773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,64,128,1,float16,fp8,0,0.679258664449056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,64,128,1,fp8,fp8,0,0.6177813212076823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,64,0,1,float16,float16,0,2.3156800270080566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,64,128,1,float16,float16,0,0.6869653065999349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,64,0,1,float16,fp8,0,2.317333380381266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,64,0,1,fp8,fp8,0,2.137824058532715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,64,128,1,float16,fp8,0,0.6910453637441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,64,128,1,fp8,fp8,0,0.6318293412526449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,64,0,1,float16,float16,0,2.501845359802246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,64,128,1,float16,float16,0,0.7072479724884033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,64,0,1,float16,fp8,0,2.3322933514912925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,64,0,1,fp8,fp8,0,2.15121062596639
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,64,128,1,float16,fp8,0,0.711728016535441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,64,128,1,fp8,fp8,0,0.6551253398259481
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,64,128,1,float16,float16,0,0.3963199853897095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,64,0,1,float16,float16,0,2.3518080711364746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,64,0,1,float16,fp8,0,2.372159957885742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,64,128,1,float16,fp8,0,0.4041813214619954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,64,0,1,float16,float16,0,1.2463786602020264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,64,128,1,fp8,fp8,0,0.3772159814834595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,64,0,1,fp8,fp8,0,2.1751252810160318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,64,128,1,float16,float16,0,0.3519039948781331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,64,0,1,float16,fp8,0,1.2555039723714192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,64,0,1,fp8,fp8,0,1.158794641494751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,64,128,1,float16,fp8,0,0.3539946476618449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,64,0,1,float16,float16,0,1.1964586575826008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,64,128,1,fp8,fp8,0,0.3263360063234965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,64,128,1,float16,float16,0,0.3569546540578206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,64,0,1,fp8,fp8,0,1.1076213518778484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,64,0,1,float16,fp8,0,1.1995999813079834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,64,128,1,float16,fp8,0,0.3594026565551758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,64,0,1,float16,float16,0,1.2035786310831706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,64,128,1,fp8,fp8,0,0.33056533336639404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,64,128,1,float16,float16,0,0.3652213414510091
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,64,0,1,float16,fp8,0,1.2038613160451253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,64,0,1,fp8,fp8,0,1.1138400236765544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,64,128,1,float16,fp8,0,0.36950933933258057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,64,0,1,float16,float16,0,1.2104159990946453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,64,128,1,fp8,fp8,0,0.33923200766245526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,64,128,1,float16,float16,0,0.2160159945487976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,64,0,1,float16,fp8,0,1.2189813454945881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,64,0,1,float16,float16,0,0.6676639715830485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,64,128,1,float16,fp8,0,0.2215999960899353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,64,0,1,fp8,fp8,0,1.1232906977335613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,64,128,1,fp8,fp8,0,0.20788266261418661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,64,128,1,float16,float16,0,0.18956265846888223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,64,0,1,fp8,fp8,0,0.6227680047353109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,64,0,1,float16,fp8,0,0.6720639864603678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,64,128,1,float16,fp8,0,0.19161067406336466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,64,0,1,float16,float16,0,0.6397386789321899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,64,128,1,fp8,fp8,0,0.18104000886281332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,64,128,1,float16,float16,0,0.1932213306427002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,64,0,1,float16,fp8,0,0.6391786734263102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,64,0,1,fp8,fp8,0,0.5950613419214884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,64,128,1,float16,fp8,0,0.19524266322453818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,64,0,1,float16,float16,0,0.6403520107269287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,64,128,1,fp8,fp8,0,0.18281600872675577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,64,128,1,float16,float16,0,0.1995733380317688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,64,0,1,float16,fp8,0,0.6440746784210205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,64,0,1,fp8,fp8,0,0.5981280008951823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,64,0,1,float16,float16,0,0.6476159890492758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,64,128,1,float16,fp8,0,0.2018453280131022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,64,128,1,fp8,fp8,0,0.1895786722501119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,64,128,1,float16,float16,0,0.12609600027402243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,64,0,1,float16,float16,0,0.3819520076115926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,64,128,1,float16,fp8,0,0.12962133685747781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,64,0,1,float16,fp8,0,0.6512266794840494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,64,0,1,fp8,fp8,0,0.6036213239034017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,64,128,1,fp8,fp8,0,0.12409599622090657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,64,128,1,float16,float16,0,0.11343466242154439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,64,0,1,fp8,fp8,0,0.3552960157394409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,64,0,1,float16,fp8,0,0.381658673286438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,64,128,1,float16,fp8,0,0.11356799801190694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,64,0,1,float16,float16,0,0.3651626507441203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,64,128,1,fp8,fp8,0,0.10523733496665955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,64,0,1,float16,fp8,0,0.3628373146057129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,64,128,1,float16,float16,0,0.11339199542999268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,64,0,1,fp8,fp8,0,0.33453865845998126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,64,0,1,float16,float16,0,0.3665279944737752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,64,128,1,float16,fp8,0,0.11381866534550984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,64,128,1,fp8,fp8,0,0.10557333628336589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,64,0,1,float16,fp8,0,0.36424533526102704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,64,128,1,float16,float16,0,0.11575466394424438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,64,0,1,fp8,fp8,0,0.3369866609573364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,64,0,1,float16,float16,0,0.36850666999816895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,64,128,1,float16,fp8,0,0.11617066462834676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,64,128,1,fp8,fp8,0,0.11141332983970642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,64,128,1,float16,float16,0,0.08917867143948872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,64,0,1,float16,fp8,0,0.3686026732126872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,64,0,1,fp8,fp8,0,0.3418933153152466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,64,0,1,float16,float16,0,0.24272533257802328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,64,128,1,float16,fp8,0,0.0909440020720164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,64,128,1,fp8,fp8,0,0.08681066830952962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,64,0,1,float16,fp8,0,0.24222934246063232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,64,0,1,fp8,fp8,0,0.22610666354497275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,64,128,1,float16,float16,0,0.09099733829498291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,64,0,1,float16,float16,0,0.24355733394622803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,64,128,1,float16,fp8,0,0.08911466598510742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,64,128,1,fp8,fp8,0,0.08506133159001668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,64,128,1,float16,float16,0,0.08898666501045227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,64,0,1,float16,fp8,0,0.2448586622873942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,64,0,1,fp8,fp8,0,0.22633065780003866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,64,128,1,float16,fp8,0,0.09088533123334248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,64,128,1,fp8,fp8,0,0.0867039958635966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,64,0,1,float16,float16,0,0.24431999524434408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,64,0,1,float16,fp8,0,0.24261333545049033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,64,128,1,float16,float16,0,0.0888426701227824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,64,0,1,fp8,fp8,0,0.22660799821217856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,64,128,1,float16,fp8,0,0.08891200025876363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,64,128,1,fp8,fp8,0,0.08703999718030293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,64,0,1,float16,float16,0,0.24289600054423013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,64,0,1,float16,fp8,0,0.24355733394622803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,64,0,1,fp8,fp8,0,0.226090669631958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,64,128,1,float16,float16,0,0.5087200005849203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,64,128,1,float16,fp8,0,0.5119359890619913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,64,128,1,fp8,fp8,0,0.4638506571451823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,64,0,1,float16,float16,0,1.42031462987264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,64,0,1,float16,fp8,0,1.425498644510905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,64,128,1,float16,float16,0,0.5173813501993815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,64,0,1,fp8,fp8,0,1.3063519795735676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,64,128,1,float16,fp8,0,0.52129065990448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,64,128,1,fp8,fp8,0,0.47485868136088055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,64,0,1,float16,float16,0,1.4287734031677246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,64,0,1,float16,fp8,0,1.4335254033406575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,64,128,1,float16,float16,0,0.5287040074666342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,64,0,1,fp8,fp8,0,1.3168000380198162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,64,128,1,fp8,fp8,0,0.49086932341257733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,64,128,1,float16,fp8,0,0.5340160131454468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,64,128,1,float16,float16,0,0.30343466997146606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,64,0,1,float16,float16,0,1.4434506098429363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,64,0,1,float16,fp8,0,1.4472427368164062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,64,128,1,float16,fp8,0,0.30824534098307294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,64,128,1,fp8,fp8,0,0.2866026759147644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,64,0,1,fp8,fp8,0,1.3343413670857747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,64,0,1,float16,float16,0,0.7802080313364664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,64,128,1,float16,float16,0,0.26713067293167114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,64,0,1,float16,fp8,0,0.7876693407694498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,64,0,1,fp8,fp8,0,0.7251466910044352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,64,128,1,float16,fp8,0,0.26875199874242145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,64,0,1,float16,float16,0,0.7402666409810384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,64,128,1,fp8,fp8,0,0.24873600403467813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,64,128,1,float16,float16,0,0.2710346579551697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,64,0,1,float16,fp8,0,0.7425973415374756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,64,0,1,fp8,fp8,0,0.6876640319824219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,64,128,1,float16,fp8,0,0.27384533484776813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,64,128,1,fp8,fp8,0,0.25429866711298627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,64,0,1,float16,float16,0,0.7466506958007812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,64,128,1,float16,float16,0,0.2794133424758911
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,64,0,1,float16,fp8,0,0.7491839726765951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,64,0,1,fp8,fp8,0,0.6930720011393229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,64,0,1,float16,float16,0,0.7545333703358968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,64,128,1,float16,fp8,0,0.28321067492167157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,64,128,1,fp8,fp8,0,0.26155734062194824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,64,128,1,float16,float16,0,0.16518400112787882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,64,0,1,float16,float16,0,0.42821868260701496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,64,0,1,float16,fp8,0,0.7578186988830566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,64,128,1,float16,fp8,0,0.1686240037282308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,64,0,1,fp8,fp8,0,0.698906660079956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,64,128,1,fp8,fp8,0,0.1590666671593984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,64,128,1,float16,float16,0,0.14229866862297058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,64,0,1,fp8,fp8,0,0.3964586655298869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,64,0,1,float16,fp8,0,0.4293066660563151
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,64,128,1,float16,fp8,0,0.14386133352915445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,64,0,1,float16,float16,0,0.40226133664449054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,64,128,1,fp8,fp8,0,0.13397333025932312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,64,128,1,float16,float16,0,0.14477333426475525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,64,0,1,float16,fp8,0,0.40245866775512695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,64,0,1,fp8,fp8,0,0.3731360038121541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,64,0,1,float16,float16,0,0.4026079972585042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,64,128,1,float16,fp8,0,0.14684800306955972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,64,128,1,fp8,fp8,0,0.13994133472442627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,64,128,1,float16,float16,0,0.14898666739463806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,64,0,1,float16,fp8,0,0.4077119827270508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,64,0,1,fp8,fp8,0,0.3775893449783325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,64,0,1,float16,float16,0,0.4095573425292969
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,64,128,1,float16,fp8,0,0.15195199847221375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,64,128,1,fp8,fp8,0,0.145087997118632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,64,128,1,float16,float16,0,0.09326933821042378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,64,0,1,float16,fp8,0,0.4118826786677043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,64,0,1,float16,float16,0,0.24743467569351196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,64,0,1,fp8,fp8,0,0.3822559912999471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,64,128,1,float16,fp8,0,0.09547733267148335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,64,128,1,fp8,fp8,0,0.0936853289604187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,64,0,1,float16,fp8,0,0.2505653301874797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,64,0,1,fp8,fp8,0,0.23266667127609253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,64,128,1,float16,float16,0,0.08596266309420268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,64,0,1,float16,float16,0,0.2376799980799357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,64,128,1,float16,fp8,0,0.08506666620572408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,64,128,1,fp8,fp8,0,0.07852800190448761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,64,0,1,float16,fp8,0,0.2392639915148417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,64,128,1,float16,float16,0,0.0848479966322581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,64,0,1,fp8,fp8,0,0.21851734320322672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,64,0,1,float16,float16,0,0.23707733551661173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,64,128,1,float16,fp8,0,0.08659733335177104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,64,128,1,fp8,fp8,0,0.08053866525491078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,64,128,1,float16,float16,0,0.08673600355784099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,64,0,1,float16,fp8,0,0.23874133825302124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,64,0,1,fp8,fp8,0,0.22011200586954752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,64,128,1,float16,fp8,0,0.08876799543698628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,64,128,1,fp8,fp8,0,0.0831573357184728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,64,0,1,float16,float16,0,0.24071999390920004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,64,128,1,float16,float16,0,0.07057066758473714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,64,0,1,float16,fp8,0,0.24090667565663657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,64,0,1,fp8,fp8,0,0.22228266795476279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,64,128,1,float16,fp8,0,0.068271999557813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,64,0,1,float16,float16,0,0.1692906618118286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,64,128,1,fp8,fp8,0,0.06663999954859416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,64,0,1,float16,fp8,0,0.16797866423924765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,64,128,1,float16,float16,0,0.06854400038719177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,64,0,1,fp8,fp8,0,0.15686933199564615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,64,128,1,float16,fp8,0,0.06877866884072621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,64,128,1,fp8,fp8,0,0.06631466746330261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,64,0,1,float16,float16,0,0.16702934106191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,64,128,1,float16,float16,0,0.06833066542943318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,64,0,1,float16,fp8,0,0.16732800006866455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,64,0,1,fp8,fp8,0,0.15665599703788757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,64,128,1,float16,fp8,0,0.06845866640408833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,64,128,1,fp8,fp8,0,0.06613866488138835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,64,0,1,float16,float16,0,0.16877333323160806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,64,128,1,float16,float16,0,0.0703306645154953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,64,0,1,float16,fp8,0,0.16871466239293417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,64,0,1,fp8,fp8,0,0.15681599577267966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,64,128,1,float16,fp8,0,0.07017066578070323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,64,128,1,fp8,fp8,0,0.06663466493288676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,64,0,1,float16,float16,0,0.16877333323160806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,64,0,1,float16,fp8,0,0.1673439939816793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,64,0,1,fp8,fp8,0,0.15826132893562317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,64,128,1,float16,float16,0,0.6721279621124268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,64,128,1,float16,fp8,0,0.6763412952423096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,64,128,1,fp8,fp8,0,0.6114933490753174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,64,0,1,float16,float16,0,1.4344693819681804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,64,0,1,float16,fp8,0,1.4404106140136719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,64,128,1,float16,float16,0,0.6858773231506348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,64,0,1,fp8,fp8,0,1.314517339070638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,64,128,1,float16,fp8,0,0.6894079844156901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,64,128,1,fp8,fp8,0,0.6276053190231323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,64,0,1,float16,float16,0,1.448581377665202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,64,128,1,float16,float16,0,0.7059306303660074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,64,0,1,fp8,fp8,0,1.3302186330159504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,64,0,1,float16,fp8,0,1.4537760416666667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,64,128,1,float16,fp8,0,0.7086026668548584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,64,0,1,float16,float16,0,1.4719893137613933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,64,128,1,fp8,fp8,0,0.6497919956843058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,64,128,1,float16,float16,0,0.39153067270914715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,64,0,1,float16,fp8,0,1.4764960606892903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,64,0,1,float16,float16,0,0.7869919935862223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,64,128,1,float16,fp8,0,0.3979733387629191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,64,0,1,fp8,fp8,0,1.3509546915690105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,64,128,1,fp8,fp8,0,0.3695146640141805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,64,128,1,float16,float16,0,0.3433386484781901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,64,0,1,float16,fp8,0,0.7935520013173422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,64,0,1,fp8,fp8,0,0.7320373058319092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,64,128,1,float16,fp8,0,0.34543466567993164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,64,128,1,fp8,fp8,0,0.3162613312403361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,64,0,1,float16,float16,0,0.7375626564025879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,64,0,1,float16,fp8,0,0.7398506800333658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,64,128,1,float16,float16,0,0.349018653233846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,64,0,1,fp8,fp8,0,0.6798186302185059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,64,0,1,float16,float16,0,0.7434026400248209
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,64,128,1,fp8,fp8,0,0.3230560024579366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,64,128,1,float16,fp8,0,0.3519146839777629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,64,128,1,float16,float16,0,0.35926934083302814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,64,0,1,float16,fp8,0,0.7447946866353353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,64,0,1,fp8,fp8,0,0.6865866978963217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,64,0,1,float16,float16,0,0.7526400089263916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,64,128,1,float16,fp8,0,0.3630613485972087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,64,128,1,fp8,fp8,0,0.3324906627337138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,64,128,1,float16,float16,0,0.20823999245961508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,64,0,1,float16,fp8,0,0.7576479911804199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,64,0,1,float16,float16,0,0.4187946716944377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,64,0,1,fp8,fp8,0,0.6967146396636963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,64,128,1,float16,fp8,0,0.21196266015370688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,64,128,1,fp8,fp8,0,0.19957866271336874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,64,128,1,float16,float16,0,0.18055466810862222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,64,0,1,float16,fp8,0,0.4220053354899089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,64,0,1,fp8,fp8,0,0.39250131448109943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,64,0,1,float16,float16,0,0.39002664883931476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,64,128,1,float16,fp8,0,0.18307733535766602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,64,128,1,fp8,fp8,0,0.17086933056513467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,64,0,1,float16,fp8,0,0.39345598220825195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,64,128,1,float16,float16,0,0.18469866116841635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,64,0,1,fp8,fp8,0,0.36346666018168133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,64,0,1,float16,float16,0,0.3903466860453288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,64,128,1,float16,fp8,0,0.18721065918604532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,64,128,1,fp8,fp8,0,0.17298666636149088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,64,128,1,float16,float16,0,0.19145600001017252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,64,0,1,float16,fp8,0,0.3956693410873413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,64,0,1,fp8,fp8,0,0.36577598253885907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,64,0,1,float16,float16,0,0.3993706703186035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,64,128,1,float16,fp8,0,0.19210133949915567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,64,128,1,fp8,fp8,0,0.1805973251660665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,64,128,1,float16,float16,0,0.11620799700419109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,64,0,1,float16,fp8,0,0.4020586808522542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,64,0,1,float16,float16,0,0.23573867479960123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,64,0,1,fp8,fp8,0,0.37220799922943115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,64,128,1,float16,fp8,0,0.119077334801356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,64,128,1,fp8,fp8,0,0.11396267016728719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,64,0,1,float16,fp8,0,0.23746667305628458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,64,128,1,float16,float16,0,0.10126933455467224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,64,0,1,fp8,fp8,0,0.2223893404006958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,64,128,1,float16,fp8,0,0.10332266489664714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,64,0,1,float16,float16,0,0.22116265694300333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,64,128,1,fp8,fp8,0,0.09527466694513957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,64,128,1,float16,float16,0,0.10148800412813823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,64,0,1,fp8,fp8,0,0.2018186648686727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,64,0,1,float16,fp8,0,0.22151466210683188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,64,128,1,float16,fp8,0,0.10550933082898457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,64,128,1,fp8,fp8,0,0.09572800000508626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,64,0,1,float16,float16,0,0.22060799598693848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,64,128,1,float16,float16,0,0.10558399558067322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,64,0,1,fp8,fp8,0,0.2036799987157186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,64,0,1,float16,fp8,0,0.22349333763122559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,64,128,1,float16,fp8,0,0.10735467076301575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,64,128,1,fp8,fp8,0,0.09939733147621155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,64,0,1,float16,float16,0,0.22291199366251627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,64,128,1,float16,float16,0,0.06853333115577698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,64,0,1,fp8,fp8,0,0.20803199211756387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,64,0,1,float16,fp8,0,0.22376533349355063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,64,0,1,float16,float16,0,0.14244799812634787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,64,128,1,float16,fp8,0,0.07025599976380666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,64,128,1,fp8,fp8,0,0.06591466565926869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,64,128,1,float16,float16,0,0.06422399977842967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,64,0,1,float16,fp8,0,0.14309333761533102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,64,0,1,fp8,fp8,0,0.13191466530164084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,64,128,1,float16,fp8,0,0.06444266438484192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,64,128,1,fp8,fp8,0,0.06029333174228668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,64,0,1,float16,float16,0,0.1383999983469645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,64,128,1,float16,float16,0,0.06440000236034393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,64,0,1,fp8,fp8,0,0.12544533610343933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,64,0,1,float16,fp8,0,0.1374559998512268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,64,128,1,float16,fp8,0,0.06418666740258534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,64,128,1,fp8,fp8,0,0.060266668597857155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,64,0,1,float16,float16,0,0.13774933417638144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,64,128,1,float16,float16,0,0.06427200138568878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,64,0,1,float16,fp8,0,0.13697600364685059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,64,0,1,fp8,fp8,0,0.12596799929936728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,64,128,1,float16,fp8,0,0.06545599798361461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,64,128,1,fp8,fp8,0,0.06189866860707601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,64,0,1,float16,float16,0,0.1384106675783793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,64,128,1,float16,float16,0,0.05590933561325073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,64,0,1,float16,fp8,0,0.13858133554458618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,64,0,1,fp8,fp8,0,0.12809066971143088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,64,0,1,float16,float16,0,0.10134399930636089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,64,128,1,float16,fp8,0,0.05599466462930044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,64,128,1,fp8,fp8,0,0.052101333936055504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,64,128,1,float16,float16,0,0.054010664423306785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,64,0,1,float16,fp8,0,0.10133866469065349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,64,0,1,fp8,fp8,0,0.09296533465385437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,64,0,1,float16,float16,0,0.0993333359559377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,64,128,1,float16,fp8,0,0.05622399846712748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,64,128,1,fp8,fp8,0,0.052000001072883606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,64,0,1,float16,fp8,0,0.1002293328444163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,64,128,1,float16,float16,0,0.056074668963750206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,64,0,1,fp8,fp8,0,0.09449066718419392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,64,128,1,float16,fp8,0,0.05610666672388712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,64,128,1,fp8,fp8,0,0.051856001218159996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,64,0,1,float16,float16,0,0.10006933410962422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,64,128,1,float16,float16,0,0.05602666735649109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,64,0,1,float16,fp8,0,0.0992746651172638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,64,0,1,fp8,fp8,0,0.09294399619102478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,64,128,1,float16,fp8,0,0.05598933498064677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,64,128,1,fp8,fp8,0,0.05273066461086273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,64,0,1,float16,float16,0,0.09965866804122925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,64,0,1,fp8,fp8,0,0.09388267000516255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,64,0,1,float16,fp8,0,0.09927999973297119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,64,128,1,float16,float16,0,0.5047893524169922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,64,128,1,fp8,fp8,0,0.4601813157399495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,64,128,1,float16,fp8,0,0.509333332379659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,64,0,1,float16,float16,0,0.9148906866709391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,64,128,1,float16,float16,0,0.5125066836675009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,64,0,1,float16,fp8,0,0.9193013509114584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,64,0,1,fp8,fp8,0,0.8384053707122803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,64,0,1,float16,float16,0,0.9245546658833822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,64,128,1,float16,fp8,0,0.5156480073928833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,64,128,1,fp8,fp8,0,0.4693973461786906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,64,0,1,float16,fp8,0,0.9300106366475424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,64,128,1,float16,float16,0,0.5252159833908081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,64,0,1,fp8,fp8,0,0.8485813140869141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,64,0,1,float16,float16,0,0.9364480177561442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,64,128,1,float16,fp8,0,0.5294346809387207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,64,128,1,fp8,fp8,0,0.48526398340861004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,64,128,1,float16,float16,0,0.29689600070317584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,64,0,1,float16,float16,0,0.5144213438034058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,64,0,1,float16,fp8,0,0.9417333602905273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,64,0,1,fp8,fp8,0,0.8629706700642904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,64,128,1,float16,fp8,0,0.30408533414204914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,64,128,1,fp8,fp8,0,0.28171734015146893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,64,128,1,float16,float16,0,0.2616906762123108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,64,0,1,float16,fp8,0,0.519589344660441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,64,0,1,fp8,fp8,0,0.47994665304819745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,64,0,1,float16,float16,0,0.4747999906539917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,64,128,1,float16,fp8,0,0.26470400889714557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,64,128,1,fp8,fp8,0,0.24203733603159586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,64,0,1,float16,fp8,0,0.4774826765060425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,64,128,1,float16,float16,0,0.2654026746749878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,64,0,1,fp8,fp8,0,0.4389013449350993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,64,0,1,float16,float16,0,0.4771626790364583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,64,128,1,float16,fp8,0,0.2672533392906189
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,64,128,1,fp8,fp8,0,0.24715199073155722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,64,0,1,float16,fp8,0,0.4806133508682251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,64,0,1,fp8,fp8,0,0.4440000057220459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,64,128,1,float16,float16,0,0.2723199923833211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,64,128,1,float16,fp8,0,0.2753973404566447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,64,128,1,fp8,fp8,0,0.2531200051307678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,64,0,1,float16,float16,0,0.4852586587270101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,64,128,1,float16,float16,0,0.15949333707491556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,64,0,1,float16,fp8,0,0.49107201894124347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,64,0,1,fp8,fp8,0,0.45081599553426105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,64,0,1,float16,float16,0,0.27689067522684735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,64,128,1,float16,fp8,0,0.16320533553759256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,64,128,1,fp8,fp8,0,0.15416533748308817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,64,128,1,float16,float16,0,0.1344160040219625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,64,0,1,float16,fp8,0,0.28011200825373334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,64,0,1,fp8,fp8,0,0.26124799251556396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,64,128,1,float16,fp8,0,0.13639466961224875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,64,128,1,fp8,fp8,0,0.12796266873677573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,64,0,1,float16,float16,0,0.2526773413022359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,64,128,1,float16,float16,0,0.13845866918563843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,64,0,1,float16,fp8,0,0.25485867261886597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,64,0,1,fp8,fp8,0,0.23448532819747925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,64,128,1,float16,fp8,0,0.14058132966359457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,64,128,1,fp8,fp8,0,0.1327946682771047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,64,0,1,float16,float16,0,0.2565866708755493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,64,128,1,float16,float16,0,0.1436799963315328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,64,0,1,float16,fp8,0,0.2571626702944438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,64,0,1,fp8,fp8,0,0.24054932594299316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,64,128,1,float16,fp8,0,0.14636266231536865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,64,128,1,fp8,fp8,0,0.13978667060534158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,64,0,1,float16,float16,0,0.2608693242073059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,64,128,1,float16,float16,0,0.08501866459846497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,64,0,1,float16,fp8,0,0.2618773380915324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,64,0,1,fp8,fp8,0,0.24675732851028442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,64,0,1,float16,float16,0,0.15516799688339233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,64,128,1,float16,fp8,0,0.08794132868448894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,64,128,1,fp8,fp8,0,0.08755733569463094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,64,0,1,float16,fp8,0,0.1586133340994517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,64,128,1,float16,float16,0,0.07685333490371704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,64,0,1,fp8,fp8,0,0.15027200182278952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,64,128,1,float16,fp8,0,0.07885866860548656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,64,128,1,fp8,fp8,0,0.07250666618347168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,64,0,1,float16,float16,0,0.14781333009401956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,64,128,1,float16,float16,0,0.07875200112660725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,64,0,1,float16,fp8,0,0.14913599689801535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,64,0,1,fp8,fp8,0,0.13615467151006064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,64,128,1,float16,fp8,0,0.07997333506743114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,64,128,1,fp8,fp8,0,0.07268266876538594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,64,0,1,float16,float16,0,0.1488693356513977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,64,128,1,float16,float16,0,0.0803306649128596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,64,0,1,float16,fp8,0,0.15000533064206442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,64,0,1,fp8,fp8,0,0.13637333114941916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,64,128,1,float16,fp8,0,0.08091199894746144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,64,128,1,fp8,fp8,0,0.07633066674073537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,64,0,1,float16,float16,0,0.15004266301790872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,64,128,1,float16,float16,0,0.05198400219281515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,64,0,1,float16,fp8,0,0.15227199594179788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,64,0,1,float16,float16,0,0.09930133819580078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,64,0,1,fp8,fp8,0,0.138565331697464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,64,128,1,float16,fp8,0,0.053818667928377785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,64,128,1,fp8,fp8,0,0.05193066596984863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,64,0,1,float16,fp8,0,0.1006773312886556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,64,0,1,fp8,fp8,0,0.09319466352462769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,64,128,1,float16,float16,0,0.050144001841545105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,64,128,1,float16,fp8,0,0.050383999943733215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,64,0,1,float16,float16,0,0.0978666643301646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,64,128,1,fp8,fp8,0,0.047877331574757896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,64,0,1,float16,fp8,0,0.0971999963124593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,64,128,1,float16,float16,0,0.050186668833096824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,64,0,1,fp8,fp8,0,0.09053867061932881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,64,0,1,float16,float16,0,0.0979360044002533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,64,128,1,float16,fp8,0,0.0517493337392807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,64,128,1,fp8,fp8,0,0.04808000226815542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,64,0,1,float16,fp8,0,0.09727467099825542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,64,0,1,fp8,fp8,0,0.09037333726882935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,64,128,1,float16,float16,0,0.05195199946562449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,64,0,1,float16,float16,0,0.09743466973304749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,64,128,1,float16,fp8,0,0.05173333485921224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,64,128,1,fp8,fp8,0,0.04828799764315287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,64,0,1,float16,fp8,0,0.09716799855232239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,64,128,1,float16,float16,0,0.04165866722663244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,64,0,1,fp8,fp8,0,0.09130133191744487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,64,0,1,float16,float16,0,0.06858666737874348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,64,128,1,float16,fp8,0,0.041738669077555336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,64,128,1,fp8,fp8,0,0.03972266614437103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,64,0,1,float16,fp8,0,0.06866666674613953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,64,0,1,fp8,fp8,0,0.06446399788061778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,64,128,1,float16,float16,0,0.04179200033346812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,64,0,1,float16,float16,0,0.06864533325036366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,64,128,1,float16,fp8,0,0.042208001017570496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,64,128,1,fp8,fp8,0,0.03955733279387156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,64,0,1,float16,fp8,0,0.07021866738796234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,64,0,1,fp8,fp8,0,0.06633066634337108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,64,128,1,float16,float16,0,0.04189866781234741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,64,0,1,float16,float16,0,0.06835733354091644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,64,128,1,float16,fp8,0,0.042026668787002563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,64,128,1,fp8,fp8,0,0.03955733279387156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,64,0,1,float16,fp8,0,0.06961066524187724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,64,0,1,fp8,fp8,0,0.06609066824118297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,64,128,1,float16,float16,0,0.041738669077555336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,64,128,1,float16,fp8,0,0.041562666495641075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,64,0,1,float16,float16,0,0.06931200126806895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,64,128,1,fp8,fp8,0,0.03976000100374222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,64,0,1,float16,fp8,0,0.06826133529345195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,64,0,1,fp8,fp8,0,0.06451733410358429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,64,128,1,float16,float16,0,0.7047573725382487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,64,128,1,float16,fp8,0,0.7001279989878336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,64,0,1,float16,float16,0,1.0252959728240967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,64,128,1,fp8,fp8,0,0.6311253309249878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,64,0,1,float16,fp8,0,1.0255733331044514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,64,0,1,fp8,fp8,0,0.9230613708496094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,64,128,1,float16,float16,0,0.7226506868998209
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,64,128,1,float16,fp8,0,0.7195733388264974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,64,0,1,float16,float16,0,1.0411307017008464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,64,128,1,fp8,fp8,0,0.6342133283615112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,64,128,1,float16,float16,0,0.7394186655680338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,64,0,1,float16,fp8,0,1.0423519611358643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,64,0,1,fp8,fp8,0,0.9291146596272787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,64,128,1,fp8,fp8,0,0.6588266690572103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,64,128,1,float16,fp8,0,0.7349599997202555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,64,0,1,float16,float16,0,1.06113600730896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,64,128,1,float16,float16,0,0.4035946528116862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,64,0,1,fp8,fp8,0,0.9519306818644205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,64,0,1,float16,fp8,0,1.0585546493530273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,64,128,1,float16,fp8,0,0.4033759832382202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,64,0,1,float16,float16,0,0.5726879835128784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,64,128,1,fp8,fp8,0,0.3741919994354248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,64,0,1,float16,fp8,0,0.5706719954808553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,64,0,1,fp8,fp8,0,0.528709332148234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,64,128,1,float16,float16,0,0.35013333956400555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,64,0,1,float16,float16,0,0.5132319927215576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,64,128,1,float16,fp8,0,0.35045866171518963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,64,128,1,fp8,fp8,0,0.31940799951553345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,64,0,1,float16,fp8,0,0.5181920131047567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,64,0,1,fp8,fp8,0,0.47024532159169513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,64,128,1,float16,float16,0,0.35463468233744305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,64,0,1,float16,float16,0,0.5207200050354004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,64,128,1,float16,fp8,0,0.35766398906707764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,64,128,1,fp8,fp8,0,0.32519465684890747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,64,0,1,float16,fp8,0,0.5220319827397665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,64,0,1,fp8,fp8,0,0.478495995203654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,64,128,1,float16,float16,0,0.3668373425801595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,64,0,1,float16,float16,0,0.5324319998423258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,64,128,1,float16,fp8,0,0.36692265669504803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,64,128,1,fp8,fp8,0,0.3349333206812541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,64,128,1,float16,float16,0,0.20791999499003092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,64,0,1,float16,fp8,0,0.5342080195744833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,64,0,1,float16,float16,0,0.2974666754404704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,64,0,1,fp8,fp8,0,0.48843733469645184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,64,128,1,float16,fp8,0,0.21095999081929526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,64,128,1,fp8,fp8,0,0.1975946625073751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,64,0,1,float16,fp8,0,0.301258663336436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,64,0,1,fp8,fp8,0,0.2775839964548747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,64,128,1,float16,float16,0,0.17858133713404337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,64,0,1,float16,float16,0,0.26577067375183105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,64,128,1,fp8,fp8,0,0.16674133141835532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,64,128,1,float16,fp8,0,0.179258664449056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,64,0,1,float16,fp8,0,0.26702932516733807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,64,128,1,float16,float16,0,0.18026665846506754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,64,0,1,fp8,fp8,0,0.24992533524831137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,64,0,1,float16,float16,0,0.26945600907007855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,64,128,1,float16,fp8,0,0.182970662911733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,64,128,1,fp8,fp8,0,0.17088532447814941
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,64,0,1,float16,fp8,0,0.2698240081469218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,64,128,1,float16,float16,0,0.18954133987426758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,64,0,1,fp8,fp8,0,0.25305600961049396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,64,0,1,float16,float16,0,0.2784159978230794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,64,128,1,float16,fp8,0,0.1908373236656189
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,64,128,1,fp8,fp8,0,0.17733333508173624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,64,0,1,float16,fp8,0,0.27963199218114215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,64,128,1,float16,float16,0,0.11187733213106792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,64,0,1,float16,float16,0,0.16193600495656332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,64,0,1,fp8,fp8,0,0.25941334168116253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,64,128,1,float16,fp8,0,0.11524266997973125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,64,128,1,fp8,fp8,0,0.10935999949773152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,64,0,1,float16,fp8,0,0.16428266962369284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,64,0,1,fp8,fp8,0,0.15481066703796387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,64,128,1,float16,float16,0,0.09688533345858256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,64,0,1,float16,float16,0,0.1486240029335022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,64,128,1,float16,fp8,0,0.09918933113416036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,64,128,1,fp8,fp8,0,0.0888426701227824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,64,0,1,float16,fp8,0,0.14758400122324625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,64,0,1,fp8,fp8,0,0.13557866215705872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,64,128,1,float16,float16,0,0.09699733058611552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,64,0,1,float16,float16,0,0.1476800044377645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,64,128,1,float16,fp8,0,0.10080533226331075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,64,128,1,fp8,fp8,0,0.09113066395123799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,64,0,1,float16,fp8,0,0.14855999747912088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,64,0,1,fp8,fp8,0,0.1362506647904714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,64,128,1,float16,float16,0,0.101200004418691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,64,0,1,float16,float16,0,0.15060800313949585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,64,128,1,float16,fp8,0,0.10257599751154582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,64,128,1,fp8,fp8,0,0.09492799639701843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,64,0,1,float16,fp8,0,0.15222400426864624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,64,0,1,fp8,fp8,0,0.14044266939163208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,64,128,1,float16,float16,0,0.06427733103434245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,64,0,1,float16,float16,0,0.09512533744176228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,64,128,1,float16,fp8,0,0.06477333108584087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,64,128,1,fp8,fp8,0,0.0620959997177124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,64,0,1,float16,fp8,0,0.09750399986902873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,64,0,1,fp8,fp8,0,0.08905067046483357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,64,128,1,float16,float16,0,0.05829866727193197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,64,0,1,float16,float16,0,0.09136533737182617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,64,128,1,float16,fp8,0,0.05922666688760122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,64,128,1,fp8,fp8,0,0.05420266588528951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,64,0,1,float16,fp8,0,0.09104532996813457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,64,0,1,fp8,fp8,0,0.08337066570917766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,64,128,1,float16,float16,0,0.060165335734685264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,64,0,1,float16,float16,0,0.09090666969617207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,64,128,1,float16,fp8,0,0.06044800082842509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,64,128,1,fp8,fp8,0,0.05601066847642263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,64,0,1,float16,fp8,0,0.0925333301226298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,64,0,1,fp8,fp8,0,0.08473599950472514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,64,128,1,float16,float16,0,0.060346667965253196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,64,0,1,float16,float16,0,0.09175999959309895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,64,128,1,float16,fp8,0,0.0620959997177124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,64,128,1,fp8,fp8,0,0.05635199944178263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,64,0,1,float16,fp8,0,0.09316800038019817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,64,0,1,fp8,fp8,0,0.0851146678129832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,64,128,1,float16,float16,0,0.04577599962552389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,64,0,1,float16,float16,0,0.06583466629187266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,64,128,1,float16,fp8,0,0.045647998650868736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,64,128,1,fp8,fp8,0,0.043738668163617454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,64,0,1,float16,fp8,0,0.06648533542950948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,64,0,1,fp8,fp8,0,0.06001600126425425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,64,128,1,float16,float16,0,0.04381866753101349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,64,0,1,float16,float16,0,0.06387733419736226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,64,128,1,float16,fp8,0,0.04331733286380768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,64,128,1,fp8,fp8,0,0.03994666785001755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,64,0,1,float16,fp8,0,0.0632479985555013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,64,0,1,fp8,fp8,0,0.05826666454474131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,64,128,1,float16,float16,0,0.04158399999141693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,64,128,1,float16,fp8,0,0.04384533564249674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,64,0,1,float16,float16,0,0.06252799928188324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,64,128,1,fp8,fp8,0,0.03963200002908707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,64,0,1,float16,fp8,0,0.06399466594060262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,64,128,1,float16,float16,0,0.04385066529115041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,64,0,1,fp8,fp8,0,0.058687999844551086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,64,128,1,float16,fp8,0,0.043696001172065735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,64,128,1,fp8,fp8,0,0.04154133299986521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,64,0,1,float16,float16,0,0.06431999802589417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,64,128,1,float16,float16,0,0.03542399903138479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,64,0,1,float16,fp8,0,0.06427200138568878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,64,0,1,fp8,fp8,0,0.058245331048965454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,64,128,1,float16,fp8,0,0.035418666899204254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,64,0,1,float16,float16,0,0.052069331208864846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,64,128,1,fp8,fp8,0,0.03331733246644338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,64,0,1,float16,fp8,0,0.05177066723505656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,64,0,1,fp8,fp8,0,0.04770666857560476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,64,128,1,float16,float16,0,0.035258665680885315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,64,128,1,float16,fp8,0,0.03536533315976461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,64,0,1,float16,float16,0,0.051685333251953125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,64,128,1,fp8,fp8,0,0.03331733246644338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,64,0,1,float16,fp8,0,0.05011733373006185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,64,128,1,float16,float16,0,0.03347733368476232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,64,0,1,fp8,fp8,0,0.048170665899912514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,64,0,1,float16,float16,0,0.049957334995269775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,64,128,1,float16,fp8,0,0.03549866626660029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,64,128,1,fp8,fp8,0,0.033520000676314034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,64,128,1,float16,float16,0,0.03547733277082443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,64,0,1,float16,fp8,0,0.05002133548259735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,64,0,1,fp8,fp8,0,0.04826133449872335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,64,128,1,float16,fp8,0,0.035429333647092186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,64,0,1,float16,float16,0,0.05188799897829691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,64,128,1,fp8,fp8,0,0.03345600018898646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,64,0,1,float16,fp8,0,0.05017066498597463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,64,0,1,fp8,fp8,0,0.047728002071380615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,64,128,1,float16,float16,0,0.5016106764475504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,64,128,1,float16,fp8,0,0.5039466619491577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,64,0,1,float16,float16,0,0.6594293514887491
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,64,128,1,fp8,fp8,0,0.4575146834055583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,64,0,1,float16,fp8,0,0.6644159952799479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,64,0,1,fp8,fp8,0,0.6012426614761353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,64,128,1,float16,float16,0,0.5142613252003988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,64,0,1,float16,float16,0,0.6761279900868734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,64,128,1,float16,fp8,0,0.5179359912872314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,64,128,1,fp8,fp8,0,0.46779731909434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,64,0,1,fp8,fp8,0,0.6151146491368612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,64,0,1,float16,fp8,0,0.6780052979787191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,64,128,1,float16,float16,0,0.5281173388163248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,64,0,1,float16,float16,0,0.6874026457468668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,64,128,1,float16,fp8,0,0.532042662302653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,64,128,1,fp8,fp8,0,0.4814079999923706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,64,0,1,float16,fp8,0,0.692512035369873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,64,128,1,float16,float16,0,0.2946880062421163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,64,0,1,fp8,fp8,0,0.6280159950256348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,64,0,1,float16,float16,0,0.38235199451446533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,64,128,1,float16,fp8,0,0.30423466364542645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,64,128,1,fp8,fp8,0,0.2836693326632182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,64,0,1,float16,fp8,0,0.3893706798553467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,64,0,1,fp8,fp8,0,0.3601919809977214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,64,128,1,float16,float16,0,0.25752532482147217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,64,0,1,float16,float16,0,0.3423413435618083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,64,128,1,float16,fp8,0,0.2593066692352295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,64,128,1,fp8,fp8,0,0.23948800563812256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,64,0,1,float16,fp8,0,0.34303998947143555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,64,0,1,fp8,fp8,0,0.316101332505544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,64,128,1,float16,float16,0,0.2613226572672526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,64,128,1,float16,fp8,0,0.2636959950129191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,64,0,1,float16,float16,0,0.3442560036977132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,64,128,1,fp8,fp8,0,0.24660267432530722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,64,0,1,float16,fp8,0,0.34835731983184814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,64,0,1,fp8,fp8,0,0.3227786620457967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,64,128,1,float16,float16,0,0.2706720034281413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,64,128,1,float16,fp8,0,0.27347733577092487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,64,0,1,float16,float16,0,0.35411731402079266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,64,128,1,fp8,fp8,0,0.25228265921274823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,64,128,1,float16,float16,0,0.15711999932924905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,64,0,1,float16,fp8,0,0.35837332407633465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,64,0,1,fp8,fp8,0,0.3287840088208516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,64,0,1,float16,float16,0,0.20430399974187216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,64,128,1,float16,fp8,0,0.16199466586112976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,64,128,1,fp8,fp8,0,0.1527253290017446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,64,128,1,float16,float16,0,0.13265066345532736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,64,0,1,float16,fp8,0,0.20773333311080933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,64,0,1,fp8,fp8,0,0.194757342338562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,64,128,1,float16,fp8,0,0.13432533542315164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,64,0,1,float16,float16,0,0.17922133207321167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,64,128,1,fp8,fp8,0,0.12563733259836832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,64,128,1,float16,float16,0,0.13522666692733765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,64,0,1,fp8,fp8,0,0.16862932840983072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,64,0,1,float16,fp8,0,0.18270933628082275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,64,128,1,float16,fp8,0,0.1386613349119822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,64,0,1,float16,float16,0,0.18106667200724283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,64,128,1,fp8,fp8,0,0.1307146648565928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,64,128,1,float16,float16,0,0.14153066277503967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,64,0,1,float16,fp8,0,0.18459200859069824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,64,0,1,fp8,fp8,0,0.17112533251444498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,64,128,1,float16,fp8,0,0.14401599764823914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,64,128,1,fp8,fp8,0,0.1360053320725759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,64,0,1,float16,float16,0,0.18672533830006918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,64,128,1,float16,float16,0,0.08435733119646709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,64,0,1,float16,fp8,0,0.18967467546463013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,64,0,1,fp8,fp8,0,0.1781866749127706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,64,0,1,float16,float16,0,0.1114453375339508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,64,128,1,float16,fp8,0,0.0865226686000824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,64,128,1,fp8,fp8,0,0.08583999673525493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,64,0,1,float16,fp8,0,0.11385066310564677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,64,0,1,fp8,fp8,0,0.11085866888364156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,64,128,1,float16,float16,0,0.075013334552447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,64,0,1,float16,float16,0,0.10257066289583842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,64,128,1,float16,fp8,0,0.07657066484292348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,64,128,1,fp8,fp8,0,0.06823466718196869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,64,0,1,float16,fp8,0,0.10336533188819885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,64,128,1,float16,float16,0,0.0746613343556722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,64,0,1,fp8,fp8,0,0.09533333778381348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,64,0,1,float16,float16,0,0.10223467151323955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,64,128,1,float16,fp8,0,0.07649066547552745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,64,128,1,fp8,fp8,0,0.07052800059318542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,64,0,1,float16,fp8,0,0.1037600040435791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,64,128,1,float16,float16,0,0.07674666742483775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,64,0,1,fp8,fp8,0,0.09538132945696513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,64,0,1,float16,float16,0,0.10333866874376933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,64,128,1,fp8,fp8,0,0.07272000114123027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,64,128,1,float16,fp8,0,0.0787306676308314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,64,128,1,float16,float16,0,0.04983466863632202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,64,0,1,fp8,fp8,0,0.09819733103116353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,64,0,1,float16,fp8,0,0.10621866583824158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,64,0,1,float16,float16,0,0.07037866612275441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,64,128,1,float16,fp8,0,0.049813335140546165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,64,128,1,fp8,fp8,0,0.04993066688378652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,64,0,1,float16,fp8,0,0.07072533170382182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,64,0,1,fp8,fp8,0,0.06613866488138835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,64,128,1,float16,float16,0,0.04704533517360687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,64,0,1,float16,float16,0,0.06700266897678375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,64,128,1,float16,fp8,0,0.04604800045490265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,64,128,1,fp8,fp8,0,0.04362666606903076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,64,0,1,float16,fp8,0,0.06832000116507213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,64,128,1,float16,float16,0,0.04698666433493296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,64,0,1,fp8,fp8,0,0.0609440008799235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,64,0,1,float16,float16,0,0.06625066697597504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,64,128,1,float16,fp8,0,0.04710400104522705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,64,128,1,fp8,fp8,0,0.04390933116277059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,64,0,1,fp8,fp8,0,0.062261333068211876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,64,128,1,float16,float16,0,0.0476746658484141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,64,0,1,float16,fp8,0,0.06693333387374878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,64,0,1,float16,float16,0,0.0682826687892278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,64,128,1,float16,fp8,0,0.04790399968624115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,64,128,1,fp8,fp8,0,0.04515199859937032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,64,0,1,float16,fp8,0,0.06909866631031036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,64,0,1,fp8,fp8,0,0.06409599880377452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,64,128,1,float16,float16,0,0.033344000577926636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,64,0,1,float16,float16,0,0.04387733340263367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,64,128,1,float16,fp8,0,0.035216001172860466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,64,128,1,fp8,fp8,0,0.03207999964555105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,64,0,1,float16,fp8,0,0.04528533418973287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,64,0,1,fp8,fp8,0,0.04383466641108195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,64,128,1,float16,float16,0,0.03350399931271871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,64,0,1,float16,float16,0,0.0439573327700297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,64,128,1,float16,fp8,0,0.03133866687615713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,64,128,1,fp8,fp8,0,0.031701333820819855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,64,0,1,fp8,fp8,0,0.04118400067090988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,64,128,1,float16,float16,0,0.031317333380381264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,64,0,1,float16,fp8,0,0.043765331308046974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,64,0,1,float16,float16,0,0.04383466641108195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,64,128,1,float16,fp8,0,0.03355200091997782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,64,128,1,fp8,fp8,0,0.031343999008337654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,64,0,1,float16,fp8,0,0.04376000165939331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,64,0,1,fp8,fp8,0,0.04177066683769226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,64,128,1,float16,float16,0,0.03345600018898646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,64,0,1,float16,float16,0,0.04341333111127218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,64,128,1,float16,fp8,0,0.033743999898433685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,64,128,1,fp8,fp8,0,0.03156266609827677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,64,0,1,float16,fp8,0,0.04398400088151296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,64,0,1,fp8,fp8,0,0.04195733368396759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,64,128,1,float16,float16,0,0.03129599988460541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,64,0,1,float16,float16,0,0.04178133110205332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,64,128,1,float16,fp8,0,0.03178133318821589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,64,128,1,fp8,fp8,0,0.029232000311215717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,64,0,1,float16,fp8,0,0.04326933125654856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,64,0,1,fp8,fp8,0,0.03979199876387914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,64,128,1,float16,float16,0,0.029333333174387615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,64,0,1,float16,float16,0,0.04222933451334635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,64,128,1,float16,fp8,0,0.030261332790056866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,64,128,1,fp8,fp8,0,0.02956266701221466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,64,0,1,float16,fp8,0,0.042266666889190674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,64,0,1,fp8,fp8,0,0.03974399964014689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,64,128,1,float16,float16,0,0.03143999973932902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,64,0,1,float16,float16,0,0.04174399872620901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,64,128,1,float16,fp8,0,0.031311998764673867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,64,128,1,fp8,fp8,0,0.029167999823888142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,64,0,1,float16,fp8,0,0.04317333300908407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,64,0,1,fp8,fp8,0,0.03945599993069967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,64,128,1,float16,float16,0,0.029802667597929638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,64,0,1,float16,float16,0,0.04181333382924398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,64,128,1,float16,fp8,0,0.031680000325044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,64,128,1,fp8,fp8,0,0.029322666426499683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,64,0,1,float16,fp8,0,0.04274133344491323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,64,0,1,fp8,fp8,0,0.039850667119026184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,64,128,1,float16,float16,0,0.5862133502960205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,64,128,1,float16,fp8,0,0.5852853457132975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,64,0,1,float16,float16,0,0.6899306774139404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,64,128,1,fp8,fp8,0,0.5390719970067342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,64,0,1,fp8,fp8,0,0.634341319402059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,64,0,1,float16,fp8,0,0.6875893274943033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,64,128,1,float16,float16,0,0.6004266738891602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,64,128,1,float16,fp8,0,0.5963146686553955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,64,0,1,float16,float16,0,0.7025120258331299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,64,128,1,fp8,fp8,0,0.5695573488871256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,64,0,1,float16,fp8,0,0.6988373597462972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,64,0,1,fp8,fp8,0,0.6625440120697021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,64,128,1,float16,float16,0,0.6033493280410767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,64,128,1,float16,fp8,0,0.5984640121459961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,64,0,1,float16,float16,0,0.7064159711201986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,64,128,1,fp8,fp8,0,0.5750826597213745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,64,128,1,float16,float16,0,0.3327999909718831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,64,0,1,fp8,fp8,0,0.6690933704376221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,64,0,1,float16,float16,0,0.38865065574645996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,64,0,1,float16,fp8,0,0.7036533355712891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,64,128,1,float16,fp8,0,0.32494399944941205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,64,128,1,fp8,fp8,0,0.32018667459487915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,64,0,1,float16,fp8,0,0.3824640115102132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,64,128,1,float16,float16,0,0.3012479941050212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,64,0,1,fp8,fp8,0,0.3697333335876465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,64,0,1,float16,float16,0,0.35675732294718426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,64,128,1,float16,fp8,0,0.3020799954732259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,64,128,1,fp8,fp8,0,0.2789280017217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,64,0,1,float16,fp8,0,0.3574026823043823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,64,128,1,float16,float16,0,0.30825599034627277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,64,0,1,fp8,fp8,0,0.32894400755564374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,64,0,1,float16,float16,0,0.36180798212687176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,64,128,1,float16,fp8,0,0.30847465991973877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,64,128,1,fp8,fp8,0,0.29711999495824176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,64,0,1,float16,fp8,0,0.360368013381958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,64,0,1,fp8,fp8,0,0.3439893325169881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,64,128,1,float16,float16,0,0.31070399284362793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,64,0,1,float16,float16,0,0.3654186725616455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,64,128,1,float16,fp8,0,0.3092533349990845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,64,128,1,fp8,fp8,0,0.2985173265139262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,64,128,1,float16,float16,0,0.17724267641703287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,64,0,1,float16,fp8,0,0.36340800921122235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,64,0,1,fp8,fp8,0,0.3473600149154663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,64,0,1,float16,float16,0,0.20709866285324097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,64,128,1,float16,fp8,0,0.17300266027450562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,64,128,1,fp8,fp8,0,0.17078399658203125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,64,0,1,float16,fp8,0,0.2036479910214742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,64,0,1,fp8,fp8,0,0.19658666849136353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,64,128,1,float16,float16,0,0.159770667552948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,64,0,1,float16,float16,0,0.18957332770029703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,64,128,1,float16,fp8,0,0.1591146687666575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,64,128,1,fp8,fp8,0,0.1490506629149119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,64,0,1,float16,fp8,0,0.18786666790644327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,64,0,1,fp8,fp8,0,0.17550933361053467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,64,128,1,float16,float16,0,0.1625706652800242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,64,0,1,float16,float16,0,0.18968000014623007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,64,128,1,float16,fp8,0,0.1622773309548696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,64,128,1,fp8,fp8,0,0.15452266732851663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,64,0,1,float16,fp8,0,0.19102933009465536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,64,0,1,fp8,fp8,0,0.1811413367589315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,64,128,1,float16,float16,0,0.1643946667512258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,64,0,1,float16,float16,0,0.1935840050379435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,64,128,1,float16,fp8,0,0.16524266203244528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,64,128,1,fp8,fp8,0,0.15899733702341715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,64,0,1,float16,fp8,0,0.1934773325920105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,64,128,1,float16,float16,0,0.0978559950987498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,64,0,1,fp8,fp8,0,0.18531199296315512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,64,0,1,float16,float16,0,0.11586133639017741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,64,128,1,float16,fp8,0,0.0972106655438741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,64,128,1,fp8,fp8,0,0.09685867031415303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,64,0,1,float16,fp8,0,0.11460799972216289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,64,0,1,fp8,fp8,0,0.11308800180753072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,64,128,1,float16,float16,0,0.08894399801890056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,64,0,1,float16,float16,0,0.10749866565068562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,64,128,1,float16,fp8,0,0.08961600065231323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,64,128,1,fp8,fp8,0,0.08054933448632558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,64,0,1,float16,fp8,0,0.10700800021489461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,64,0,1,fp8,fp8,0,0.09701333443323772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,64,128,1,float16,float16,0,0.08891733487447102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,64,0,1,float16,float16,0,0.10785067081451416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,64,128,1,float16,fp8,0,0.08933867017428081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,64,128,1,fp8,fp8,0,0.08249066770076752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,64,0,1,float16,fp8,0,0.1074079970518748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,64,0,1,fp8,fp8,0,0.09872532884279887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,64,128,1,float16,float16,0,0.09107200304667155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,64,0,1,float16,float16,0,0.10759466886520386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,64,128,1,float16,fp8,0,0.09134933352470398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,64,128,1,fp8,fp8,0,0.08557867010434468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,64,0,1,fp8,fp8,0,0.10101866722106934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,64,0,1,float16,fp8,0,0.10956266522407532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,64,128,1,float16,float16,0,0.0562666654586792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,64,128,1,float16,fp8,0,0.056517332792282104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,64,0,1,float16,float16,0,0.0662773350874583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,64,128,1,fp8,fp8,0,0.05385600030422211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,64,0,1,float16,fp8,0,0.06677866478761037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,64,0,1,fp8,fp8,0,0.06411733229955037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,64,128,1,float16,float16,0,0.053904001911481224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,64,0,1,float16,float16,0,0.06433600187301636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,64,128,1,float16,fp8,0,0.05415999889373779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,64,128,1,fp8,fp8,0,0.051311999559402466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,64,0,1,fp8,fp8,0,0.06025066475073496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,64,0,1,float16,fp8,0,0.06504533191521962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,64,128,1,float16,float16,0,0.05409599840641022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,64,0,1,float16,float16,0,0.06440000236034393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,64,128,1,float16,fp8,0,0.05540800094604492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,64,128,1,fp8,fp8,0,0.052058666944503784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,64,0,1,float16,fp8,0,0.06603200236956279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,64,128,1,float16,float16,0,0.054570664962132774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,64,0,1,fp8,fp8,0,0.06159466505050659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,64,0,1,float16,float16,0,0.06636266907056172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,64,128,1,float16,fp8,0,0.05417599777380625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,64,128,1,fp8,fp8,0,0.05230399966239929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,64,128,1,float16,float16,0,0.038959999879201256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,64,0,1,float16,fp8,0,0.06647466619809468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,64,0,1,fp8,fp8,0,0.06084266801675161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,64,128,1,float16,fp8,0,0.03995733211437861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,64,128,1,fp8,fp8,0,0.03743999948104223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,64,0,1,float16,float16,0,0.04570133487383524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,64,0,1,float16,fp8,0,0.0458186666170756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,64,0,1,fp8,fp8,0,0.0436160018046697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,64,128,1,float16,float16,0,0.03846933444341024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,64,0,1,float16,float16,0,0.04383466641108195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,64,128,1,float16,fp8,0,0.03751466671625773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,64,128,1,fp8,fp8,0,0.03695466617743174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,64,0,1,float16,fp8,0,0.044250667095184326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,64,0,1,fp8,fp8,0,0.04238399863243103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,64,128,1,float16,float16,0,0.03842666745185852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,64,128,1,float16,fp8,0,0.03948266555865606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,64,0,1,float16,float16,0,0.0450133333603541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,64,128,1,fp8,fp8,0,0.0367253323396047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,64,0,1,float16,fp8,0,0.04584533472855886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,64,0,1,fp8,fp8,0,0.04382933179537455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,64,128,1,float16,float16,0,0.03801066676775614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,64,0,1,float16,float16,0,0.04468800127506256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,64,128,1,float16,fp8,0,0.039664000272750854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,64,128,1,fp8,fp8,0,0.03660800059636434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,64,0,1,float16,fp8,0,0.04490133126576742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,64,128,1,float16,float16,0,0.027535999814669292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,64,0,1,fp8,fp8,0,0.04334400097529093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,64,0,1,float16,float16,0,0.03338133295377096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,64,128,1,fp8,fp8,0,0.02739199995994568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,64,128,1,float16,fp8,0,0.02824000020821889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,64,0,1,float16,fp8,0,0.033546666304270424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,64,0,1,fp8,fp8,0,0.033045334120591484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,64,128,1,float16,float16,0,0.02604266752799352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,64,0,1,float16,float16,0,0.03136000037193298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,64,128,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,64,128,1,fp8,fp8,0,0.025663999219735462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,64,0,1,float16,fp8,0,0.03156800071398417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,64,0,1,fp8,fp8,0,0.03136533250411352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,64,128,1,float16,float16,0,0.027376001079877216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,64,0,1,float16,float16,0,0.032229334115982056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,64,128,1,float16,fp8,0,0.02735466758410136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,64,128,1,fp8,fp8,0,0.02743999908367793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,64,0,1,float16,fp8,0,0.032405334214369454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,64,0,1,fp8,fp8,0,0.03151999910672506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,64,128,1,float16,float16,0,0.02773866554101308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,64,0,1,float16,float16,0,0.033557333052158356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,64,128,1,float16,fp8,0,0.027215999861558277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,64,0,1,float16,fp8,0,0.03379199902216593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,64,128,1,fp8,fp8,0,0.027461332579453785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,64,0,1,fp8,fp8,0,0.03338133295377096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,64,128,1,float16,float16,0,0.023103999594847362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,64,0,1,float16,float16,0,0.029418667157491047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,64,128,1,float16,fp8,0,0.023013333479563396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,64,128,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,64,0,1,float16,fp8,0,0.029311999678611755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,64,0,1,fp8,fp8,0,0.02743999908367793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,64,128,1,float16,float16,0,0.023077333966890972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,64,0,1,float16,float16,0,0.029258665939172108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,64,128,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,64,128,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,64,0,1,float16,fp8,0,0.02921066681543986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,64,0,1,fp8,fp8,0,0.02741333345572154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,64,128,1,float16,float16,0,0.023365333676338196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,64,0,1,float16,float16,0,0.028698667883872986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,64,128,1,float16,fp8,0,0.02295999974012375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,64,128,1,fp8,fp8,0,0.02288000037272771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,64,0,1,float16,fp8,0,0.029253333806991577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,64,0,1,fp8,fp8,0,0.027514666318893433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,64,0,1,float16,float16,0,0.029333333174387615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,64,128,1,float16,fp8,0,0.02333866556485494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,64,128,1,float16,float16,0,0.02420266717672348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,64,128,1,fp8,fp8,0,0.023247999449570973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,64,0,1,float16,fp8,0,0.02958933264017105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,64,0,1,fp8,fp8,0,0.028213332096735638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,64,128,1,float16,float16,0,0.5704053243001302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,64,0,1,float16,float16,0,0.5785813331604004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,64,128,1,float16,fp8,0,0.5671360095342001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,64,128,1,fp8,fp8,0,0.5187093416849772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,64,0,1,float16,fp8,0,0.5754133462905884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,64,0,1,fp8,fp8,0,0.5260853370030721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,64,128,1,float16,float16,0,0.5814506610234579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,64,0,1,float16,float16,0,0.5892533461252848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,64,128,1,float16,fp8,0,0.5779039859771729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,64,128,1,fp8,fp8,0,0.5489013195037842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,64,0,1,float16,fp8,0,0.5868800083796183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,64,0,1,fp8,fp8,0,0.555951992670695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,64,128,1,float16,float16,0,0.5869866609573364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,64,0,1,float16,float16,0,0.5943839947382609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,64,128,1,float16,fp8,0,0.5823466777801514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,64,128,1,fp8,fp8,0,0.5548906723658243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,64,0,1,float16,fp8,0,0.590778668721517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,64,0,1,fp8,fp8,0,0.5595466693242391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,64,128,1,float16,float16,0,0.3223466674486796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,64,0,1,float16,float16,0,0.3288480043411255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,64,128,1,fp8,fp8,0,0.30777066946029663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,64,128,1,float16,fp8,0,0.31588266293207806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,64,0,1,float16,fp8,0,0.3206239938735962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,64,0,1,fp8,fp8,0,0.31166932980219525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,64,128,1,float16,float16,0,0.29291733105977374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,64,0,1,float16,float16,0,0.298581341902415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,64,128,1,float16,fp8,0,0.2927093307177226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,64,128,1,fp8,fp8,0,0.2699679931004842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,64,0,1,float16,fp8,0,0.29611732562383014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,64,0,1,fp8,fp8,0,0.27244800329208374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,64,128,1,float16,float16,0,0.29926933844884235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,64,0,1,float16,float16,0,0.3042880098025004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,64,128,1,float16,fp8,0,0.29773332675298053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,64,128,1,fp8,fp8,0,0.28862400849660236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,64,0,1,float16,fp8,0,0.3014826575915019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,64,0,1,fp8,fp8,0,0.28774933020273846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,64,128,1,float16,float16,0,0.3011839985847473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,64,0,1,float16,float16,0,0.3059200048446655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,64,128,1,float16,fp8,0,0.2996373375256856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,64,128,1,fp8,fp8,0,0.2855946620305379
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,64,0,1,float16,fp8,0,0.3031093279520671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,64,128,1,float16,float16,0,0.17087999979654947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,64,0,1,float16,float16,0,0.17324266831080118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,64,0,1,fp8,fp8,0,0.290010670820872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,64,128,1,float16,fp8,0,0.167194664478302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,64,128,1,fp8,fp8,0,0.1650879979133606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,64,0,1,float16,fp8,0,0.1716053287188212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,64,0,1,fp8,fp8,0,0.16684265931447348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,64,128,1,float16,float16,0,0.15466666221618652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,64,0,1,float16,float16,0,0.1586240033308665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,64,128,1,float16,fp8,0,0.15653333067893982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,64,128,1,fp8,fp8,0,0.143994669119517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,64,0,1,float16,fp8,0,0.1569386621316274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,64,0,1,fp8,fp8,0,0.14476266503334045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,64,128,1,float16,float16,0,0.15819199879964194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,64,0,1,float16,float16,0,0.16148266196250916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,64,128,1,float16,fp8,0,0.15843199690183005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,64,128,1,fp8,fp8,0,0.149317334095637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,64,0,1,float16,fp8,0,0.16100266575813293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,64,0,1,fp8,fp8,0,0.1529813309510549
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,64,128,1,float16,float16,0,0.16102932890256247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,64,0,1,float16,float16,0,0.16259732842445374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,64,128,1,float16,fp8,0,0.16037333011627197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,64,128,1,fp8,fp8,0,0.15317866206169128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,64,0,1,fp8,fp8,0,0.15573333700497946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,64,0,1,float16,fp8,0,0.16085333625475565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,64,128,1,float16,float16,0,0.09514133135477702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,64,0,1,float16,float16,0,0.09563733140627544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,64,128,1,float16,fp8,0,0.09358933568000793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,64,128,1,fp8,fp8,0,0.09293333689371745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,64,0,1,float16,fp8,0,0.09482666850090027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,64,128,1,float16,float16,0,0.08713600039482117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,64,0,1,fp8,fp8,0,0.09325333436330159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,64,128,1,float16,fp8,0,0.08752000331878662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,64,0,1,float16,float16,0,0.08874666690826416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,64,128,1,fp8,fp8,0,0.0788320004940033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,64,0,1,float16,fp8,0,0.08678932984670003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,64,0,1,fp8,fp8,0,0.08083199958006541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,64,128,1,float16,float16,0,0.08867733677228291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,64,0,1,float16,float16,0,0.08912000060081482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,64,128,1,float16,fp8,0,0.08679999907811482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,64,128,1,fp8,fp8,0,0.08086933195590973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,64,0,1,float16,fp8,0,0.08966933687527974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,64,0,1,fp8,fp8,0,0.08229333162307739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,64,128,1,float16,float16,0,0.08891200025876363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,64,0,1,float16,float16,0,0.0902346670627594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,64,128,1,float16,fp8,0,0.08912000060081482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,64,128,1,fp8,fp8,0,0.0842186709245046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,64,0,1,float16,fp8,0,0.08936533331871033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,64,0,1,fp8,fp8,0,0.08513599634170532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,64,128,1,float16,float16,0,0.05575466652711233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,64,128,1,float16,fp8,0,0.05440000196297964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,64,128,1,fp8,fp8,0,0.053674668073654175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,64,0,1,float16,float16,0,0.05573866764704386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,64,0,1,float16,fp8,0,0.055999999245007835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,64,128,1,float16,float16,0,0.05384533107280731
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,64,0,1,fp8,fp8,0,0.05300266544024149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,64,0,1,float16,float16,0,0.05397866666316986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,64,128,1,float16,fp8,0,0.05372266471385956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,64,128,1,fp8,fp8,0,0.04970666766166687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,64,0,1,float16,fp8,0,0.05407466491063436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,64,128,1,float16,float16,0,0.052629331747690834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,64,0,1,fp8,fp8,0,0.05028266708056132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,64,0,1,float16,float16,0,0.05525333185990652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,64,128,1,float16,fp8,0,0.05277333160241445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,64,128,1,fp8,fp8,0,0.05042133231957754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,64,0,1,float16,fp8,0,0.05436266462008158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,64,0,1,fp8,fp8,0,0.05193600058555603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,64,128,1,float16,float16,0,0.05407999952634176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,64,0,1,float16,float16,0,0.054144000013669334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,64,128,1,float16,fp8,0,0.053914666175842285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,64,128,1,fp8,fp8,0,0.050373335679372154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,64,0,1,float16,fp8,0,0.05398933092753092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,64,0,1,fp8,fp8,0,0.050714666644732155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,64,128,1,float16,float16,0,0.039274667700131737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,64,0,1,float16,float16,0,0.039488000174363456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,64,128,1,float16,fp8,0,0.03755733370780945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,64,128,1,fp8,fp8,0,0.0378506655494372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,64,0,1,float16,fp8,0,0.03982399900754293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,64,0,1,fp8,fp8,0,0.03755199909210205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,64,128,1,float16,float16,0,0.037632000943024956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,64,0,1,float16,float16,0,0.037615999579429626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,64,128,1,float16,fp8,0,0.03758399933576584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,64,128,1,fp8,fp8,0,0.03619733452796936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,64,0,1,float16,fp8,0,0.037685332198937736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,64,0,1,fp8,fp8,0,0.03595199932654699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,64,128,1,float16,float16,0,0.037776000797748566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,64,0,1,float16,float16,0,0.03759466608365377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,64,128,1,float16,fp8,0,0.03783999880154928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,64,128,1,fp8,fp8,0,0.03527999917666117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,64,0,1,float16,fp8,0,0.03827733298142751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,64,0,1,fp8,fp8,0,0.03581333408753077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,64,128,1,float16,float16,0,0.03751999884843826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,64,0,1,float16,float16,0,0.03754133234421412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,64,128,1,float16,fp8,0,0.037178667883078255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,64,128,1,fp8,fp8,0,0.035690667728583016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,64,0,1,float16,fp8,0,0.03751999884843826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,64,0,1,fp8,fp8,0,0.03569599986076355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,64,128,1,float16,float16,0,0.027621333797772724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,64,0,1,float16,float16,0,0.02935466667016347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,64,128,1,float16,fp8,0,0.0272533322374026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,64,128,1,fp8,fp8,0,0.0276053324341774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,64,0,1,float16,fp8,0,0.029071999092896778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,64,0,1,fp8,fp8,0,0.0276853342851003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,64,128,1,float16,float16,0,0.026015999416510265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,64,0,1,float16,float16,0,0.027424000203609467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,64,128,1,float16,fp8,0,0.02735466758410136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,64,128,1,fp8,fp8,0,0.025754667818546295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,64,0,1,float16,fp8,0,0.02738133321205775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,64,0,1,fp8,fp8,0,0.027317332724730175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,64,128,1,float16,float16,0,0.027322667340437572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,64,0,1,float16,float16,0,0.027215999861558277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,64,128,1,float16,fp8,0,0.027237333357334137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,64,128,1,fp8,fp8,0,0.02887466549873352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,64,0,1,float16,fp8,0,0.027242665489514668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,64,0,1,fp8,fp8,0,0.025573333104451496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,64,128,1,float16,float16,0,0.0278613343834877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,64,0,1,float16,float16,0,0.02749866743882497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,64,128,1,float16,fp8,0,0.027119999130566914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,64,0,1,float16,fp8,0,0.027952000498771667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,64,128,1,fp8,fp8,0,0.027322667340437572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,64,0,1,fp8,fp8,0,0.027434666951497395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,64,128,1,float16,float16,0,0.023242667317390442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,64,0,1,float16,float16,0,0.023472001155217487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,64,128,1,float16,fp8,0,0.023503998915354412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,64,128,1,fp8,fp8,0,0.02124800036350886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,64,0,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,64,0,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,64,128,1,float16,float16,0,0.02332266668478648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,64,0,1,float16,float16,0,0.023333333432674408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,64,128,1,float16,fp8,0,0.02309333284695943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,64,128,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,64,0,1,float16,fp8,0,0.02306666721900304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,64,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,64,128,1,float16,float16,0,0.023354666928450268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,64,0,1,float16,float16,0,0.023717333873112995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,64,128,1,float16,fp8,0,0.023071999351183575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,64,128,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,64,0,1,float16,fp8,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,64,0,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,64,128,1,float16,float16,0,0.02348800003528595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,64,128,1,float16,fp8,0,0.023455999791622162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,64,128,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,64,0,1,float16,float16,0,0.02364266663789749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,64,0,1,float16,fp8,0,0.023711999257405598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,64,0,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,64,0,1,float16,float16,0,0.023056000471115112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,64,128,1,float16,float16,0,0.021882665654023487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,64,128,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,64,128,1,fp8,fp8,0,0.021370666722456615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,64,0,1,float16,fp8,0,0.02390933285156886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,64,0,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,64,128,1,float16,float16,0,0.022917332748572033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,64,0,1,float16,float16,0,0.023754666248957317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,64,128,1,float16,fp8,0,0.021701333423455555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,64,128,1,fp8,fp8,0,0.021674667795499165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,64,0,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,64,0,1,float16,fp8,0,0.02365333338578542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,64,128,1,float16,float16,0,0.021642667551835377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,64,0,1,float16,float16,0,0.02329600105683009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,64,128,1,fp8,fp8,0,0.02124800036350886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,64,128,1,float16,fp8,0,0.023455999791622162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,64,0,1,float16,fp8,0,0.023525332411130268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,64,0,1,fp8,fp8,0,0.021568000316619873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,64,128,1,float16,float16,0,0.02347733328739802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,64,0,1,float16,float16,0,0.02233600119749705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,64,128,1,float16,fp8,0,0.021967999637126923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,64,128,1,fp8,fp8,0,0.022053333620230358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,64,0,1,fp8,fp8,0,0.023290666441122692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,64,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,64,0,1,float16,float16,0,0.27081600824991864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,64,128,1,float16,float16,0,0.2767146627108256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,64,128,1,float16,fp8,0,0.2773120005925496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,64,0,1,fp8,fp8,0,0.24710400899251303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,64,128,1,fp8,fp8,0,0.254586656888326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,64,0,1,float16,fp8,0,0.2696746587753296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,64,128,1,float16,float16,0,0.28488532702128094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,64,0,1,float16,float16,0,0.2775413393974304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,64,128,1,float16,fp8,0,0.2818293372790019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,64,128,1,fp8,fp8,0,0.26712532838185626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,64,0,1,float16,fp8,0,0.27677865823109943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,64,0,1,fp8,fp8,0,0.261407991250356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,64,128,1,float16,float16,0,0.2829280098279317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,64,0,1,float16,float16,0,0.2780533234278361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,64,128,1,float16,fp8,0,0.28196799755096436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,64,128,1,fp8,fp8,0,0.2702346642812093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,64,0,1,float16,fp8,0,0.2759573260943095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,64,0,1,fp8,fp8,0,0.2634720007578532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,64,128,1,float16,float16,0,0.1618826687335968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,64,0,1,float16,float16,0,0.15827199816703796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,64,128,1,float16,fp8,0,0.15818132956822714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,64,128,1,fp8,fp8,0,0.15589333573977152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,64,0,1,float16,fp8,0,0.1562933325767517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,64,0,1,fp8,fp8,0,0.15289066235224405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,64,128,1,float16,float16,0,0.14807466665903726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,64,0,1,float16,float16,0,0.14460800091425577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,64,128,1,float16,fp8,0,0.1467519998550415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,64,128,1,fp8,fp8,0,0.1359946628411611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,64,0,1,float16,fp8,0,0.14526933431625366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,64,0,1,fp8,fp8,0,0.1318826675415039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,64,128,1,float16,float16,0,0.14999999602635702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,64,0,1,float16,float16,0,0.1463093360265096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,64,128,1,float16,fp8,0,0.15026133259137472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,64,128,1,fp8,fp8,0,0.1411786675453186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,64,0,1,float16,fp8,0,0.14661332964897156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,64,0,1,fp8,fp8,0,0.13688533504803976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,64,128,1,float16,float16,0,0.15174933274586996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,64,0,1,float16,float16,0,0.14814933141072592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,64,128,1,fp8,fp8,0,0.14627733826637268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,64,128,1,float16,fp8,0,0.150325338045756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,64,0,1,float16,fp8,0,0.14854400356610617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,64,0,1,fp8,fp8,0,0.141077329715093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,64,128,1,float16,float16,0,0.0913866659005483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,64,0,1,float16,float16,0,0.08948266506195068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,64,128,1,float16,fp8,0,0.08941333492596944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,64,128,1,fp8,fp8,0,0.08898666501045227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,64,0,1,float16,fp8,0,0.08768533666928609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,64,128,1,float16,float16,0,0.08333866794904073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,64,0,1,float16,float16,0,0.08201600114504497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,64,0,1,fp8,fp8,0,0.08736532926559448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,64,128,1,float16,fp8,0,0.082997332016627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,64,128,1,fp8,fp8,0,0.07679466903209686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,64,0,1,fp8,fp8,0,0.07437333464622498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,64,0,1,float16,fp8,0,0.08081600069999695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,64,128,1,float16,float16,0,0.08323200047016144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,64,0,1,float16,float16,0,0.08288000027338664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,64,128,1,fp8,fp8,0,0.07643733421961467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,64,128,1,float16,fp8,0,0.08462933699289958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,64,0,1,float16,fp8,0,0.08120533327261607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,64,0,1,fp8,fp8,0,0.07454399764537811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,64,128,1,float16,float16,0,0.08463467160860698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,64,0,1,float16,float16,0,0.08273600041866302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,64,128,1,float16,fp8,0,0.08322133123874664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,64,128,1,fp8,fp8,0,0.07948266466458638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,64,0,1,float16,fp8,0,0.08311466872692108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,64,0,1,fp8,fp8,0,0.07913066446781158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,64,128,1,float16,float16,0,0.0513919989267985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,64,0,1,float16,float16,0,0.050581331054369606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,64,128,1,float16,fp8,0,0.05179200073083242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,64,128,1,fp8,fp8,0,0.050160000721613564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,64,0,1,float16,fp8,0,0.050293331344922386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,64,0,1,fp8,fp8,0,0.04773866633574168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,64,128,1,float16,float16,0,0.04981866478919983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,64,0,1,float16,float16,0,0.04773333172003428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,64,128,1,float16,fp8,0,0.050016000866889954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,64,128,1,fp8,fp8,0,0.045663997530937195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,64,0,1,float16,fp8,0,0.049360002080599465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,64,0,1,fp8,fp8,0,0.04586666822433472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,64,128,1,float16,float16,0,0.04975466430187225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,64,0,1,float16,float16,0,0.04956266780694326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,64,128,1,float16,fp8,0,0.05011733373006185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,64,128,1,fp8,fp8,0,0.04770666857560476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,64,0,1,float16,fp8,0,0.04972266654173533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,64,0,1,fp8,fp8,0,0.04549333453178406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,64,128,1,float16,float16,0,0.049738665421803795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,64,0,1,float16,float16,0,0.04822400212287903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,64,128,1,float16,fp8,0,0.049695998430252075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,64,128,1,fp8,fp8,0,0.0462773342927297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,64,0,1,float16,fp8,0,0.04784533381462097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,64,0,1,fp8,fp8,0,0.04584533472855886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,64,128,1,float16,float16,0,0.035391998787721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,64,0,1,float16,float16,0,0.03570133447647095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,64,128,1,fp8,fp8,0,0.03532266616821289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,64,128,1,float16,fp8,0,0.03754133234421412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,64,0,1,float16,fp8,0,0.035487999518712364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,64,128,1,float16,float16,0,0.0354666660229365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,64,0,1,fp8,fp8,0,0.033717334270477295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,64,0,1,float16,float16,0,0.03534399966398875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,64,128,1,float16,fp8,0,0.03552533437808355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,64,128,1,fp8,fp8,0,0.03366933266321818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,64,0,1,float16,fp8,0,0.035445332527160645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,64,0,1,fp8,fp8,0,0.03347733368476232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,64,0,1,float16,float16,0,0.033802665770053864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,64,128,1,float16,float16,0,0.035445332527160645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,64,128,1,float16,fp8,0,0.03576533248027166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,64,128,1,fp8,fp8,0,0.03346666693687439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,64,0,1,float16,fp8,0,0.034741332133611046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,64,0,1,fp8,fp8,0,0.03379199902216593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,64,128,1,float16,float16,0,0.03540800015131632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,64,0,1,float16,float16,0,0.03568000098069509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,64,128,1,float16,fp8,0,0.036661334335803986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,64,128,1,fp8,fp8,0,0.03456533451875051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,64,0,1,float16,fp8,0,0.03535466641187668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,64,0,1,fp8,fp8,0,0.03356799980004629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,64,128,1,float16,float16,0,0.025536000728607178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,64,0,1,float16,float16,0,0.025285333395004272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,64,128,1,float16,fp8,0,0.027429332335789997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,64,128,1,fp8,fp8,0,0.025546667476495106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,64,0,1,float16,fp8,0,0.02584533393383026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,64,0,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,64,128,1,float16,float16,0,0.025333332518736523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,64,0,1,float16,float16,0,0.024959998826185863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,64,128,1,float16,fp8,0,0.025263999899228413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,64,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,64,128,1,fp8,fp8,0,0.024447999894618988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,64,0,1,fp8,fp8,0,0.02306666721900304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,64,128,1,float16,float16,0,0.02532800038655599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,64,0,1,float16,float16,0,0.025642665723959606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,64,128,1,float16,fp8,0,0.025093334416548412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,64,128,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,64,0,1,float16,fp8,0,0.025472000241279602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,64,0,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,64,128,1,float16,float16,0,0.025381334125995636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,64,0,1,float16,float16,0,0.025487999121348064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,64,128,1,float16,fp8,0,0.025637333591779072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,64,128,1,fp8,fp8,0,0.02548266698916753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,64,0,1,float16,fp8,0,0.025498665869235992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,64,0,1,fp8,fp8,0,0.02515200028816859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,64,128,1,float16,float16,0,0.021104000508785248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,64,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,64,128,1,float16,fp8,0,0.02143999934196472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,64,128,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,64,0,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,64,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,64,128,1,float16,float16,0,0.021562665700912476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,64,0,1,float16,float16,0,0.021536000072956085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,64,128,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,64,128,1,fp8,fp8,0,0.021189334491888683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,64,0,1,float16,fp8,0,0.021429332594076794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,64,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,64,128,1,float16,float16,0,0.02146666745344798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,64,0,1,float16,float16,0,0.021402666966120403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,64,128,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,64,128,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,64,0,1,float16,fp8,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,64,0,1,fp8,fp8,0,0.019487999379634857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,64,128,1,float16,float16,0,0.02130666623512904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,64,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,64,128,1,float16,fp8,0,0.021104000508785248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,64,128,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,64,0,1,float16,fp8,0,0.020986666282018025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,64,0,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,64,0,1,float16,float16,0,0.019648000597953796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,64,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,64,128,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,64,0,1,float16,fp8,0,0.01956266661485036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,64,0,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,64,128,1,float16,float16,0,0.0210506667693456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,64,0,1,float16,float16,0,0.021359999974568684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,64,128,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,64,128,1,fp8,fp8,0,0.019930666933457058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,64,0,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,64,0,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,64,128,1,float16,float16,0,0.02144533395767212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,64,0,1,float16,float16,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,64,128,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,64,128,1,float16,fp8,0,0.021664001047611237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,64,0,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,64,0,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,64,0,1,float16,float16,0,0.019066666563351948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,64,128,1,float16,float16,0,0.021701333423455555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,64,128,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,64,128,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,64,0,1,float16,fp8,0,0.02164799968401591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,64,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,64,128,1,float16,float16,0,0.01923199991385142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,64,0,1,float16,float16,0,0.01932799940307935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,64,128,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,64,128,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,64,0,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,64,0,1,fp8,fp8,0,0.018863999595244724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,64,128,1,float16,float16,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,64,0,1,float16,float16,0,0.019343999524911244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,64,128,1,float16,fp8,0,0.019727999965349834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,64,128,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,64,0,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,64,0,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,64,128,1,float16,float16,0,0.01926400015751521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,64,0,1,float16,float16,0,0.019167999426523846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,64,128,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,64,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,64,0,1,float16,fp8,0,0.0199946661790212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,64,0,1,fp8,fp8,0,0.019381333142518997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,64,128,1,float16,float16,0,0.020421333611011505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,64,0,1,float16,float16,0,0.019082666685183842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,64,128,1,float16,fp8,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,64,128,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,64,0,1,float16,fp8,0,0.01942933350801468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,64,0,1,fp8,fp8,0,0.019567999988794327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,64,128,1,float16,float16,0,0.15426133076349893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,64,0,1,float16,float16,0,0.1537813345591227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,64,128,1,float16,fp8,0,0.15311466654141745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,64,128,1,fp8,fp8,0,0.143696000178655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,64,0,1,float16,fp8,0,0.15386666854222616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,64,0,1,fp8,fp8,0,0.14215999841690063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,64,128,1,float16,float16,0,0.15602133671442667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,64,0,1,float16,float16,0,0.15665066242218018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,64,128,1,float16,fp8,0,0.15493333339691162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,64,128,1,fp8,fp8,0,0.14841066797574362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,64,0,1,float16,fp8,0,0.15636266271273294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,64,0,1,fp8,fp8,0,0.14878400166829428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,64,128,1,float16,float16,0,0.15633066495259604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,64,128,1,float16,fp8,0,0.1555466651916504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,64,0,1,float16,float16,0,0.1578933298587799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,64,128,1,fp8,fp8,0,0.15281066298484802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,64,0,1,float16,fp8,0,0.15677866339683533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,64,0,1,fp8,fp8,0,0.152730663617452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,64,128,1,float16,float16,0,0.09293333689371745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,64,0,1,float16,float16,0,0.09288533528645833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,64,128,1,fp8,fp8,0,0.09297066926956177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,64,128,1,float16,fp8,0,0.09136533737182617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,64,0,1,float16,fp8,0,0.09102400143941243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,64,0,1,fp8,fp8,0,0.09259733557701111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,64,128,1,float16,float16,0,0.08531733353932698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,64,0,1,float16,float16,0,0.08493866523106892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,64,128,1,float16,fp8,0,0.08500267068545024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,64,128,1,fp8,fp8,0,0.07892266909281413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,64,0,1,float16,fp8,0,0.08526933193206787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,64,0,1,fp8,fp8,0,0.07855466504891713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,64,0,1,float16,float16,0,0.08635200063387553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,64,128,1,float16,fp8,0,0.08621866504351298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,64,128,1,float16,float16,0,0.08560533324877422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,64,128,1,fp8,fp8,0,0.08029333253701527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,64,0,1,float16,fp8,0,0.0863146682580312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,64,0,1,fp8,fp8,0,0.08036266764005025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,64,0,1,float16,float16,0,0.08674133817354839
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,64,128,1,float16,float16,0,0.08667733271916707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,64,128,1,float16,fp8,0,0.08687999844551086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,64,128,1,fp8,fp8,0,0.08301333089669545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,64,0,1,float16,fp8,0,0.0867199997107188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,64,128,1,float16,float16,0,0.05398400127887726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,64,0,1,fp8,fp8,0,0.08475200335184734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,64,0,1,float16,float16,0,0.05315199991067251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,64,128,1,float16,fp8,0,0.052527998884518944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,64,128,1,fp8,fp8,0,0.050570666790008545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,64,0,1,fp8,fp8,0,0.051354666550954185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,64,0,1,float16,fp8,0,0.05446400245030721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,64,128,1,float16,float16,0,0.05064000189304352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,64,0,1,float16,float16,0,0.04985066751639048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,64,128,1,float16,fp8,0,0.05190933247407278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,64,0,1,float16,fp8,0,0.050069332122802734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,64,128,1,fp8,fp8,0,0.04896533489227295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,64,0,1,fp8,fp8,0,0.048714667558670044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,64,128,1,float16,float16,0,0.05202666421731313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,64,0,1,float16,float16,0,0.05080533524354299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,64,128,1,float16,fp8,0,0.0524479995171229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,64,128,1,fp8,fp8,0,0.04960533479849497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,64,0,1,float16,fp8,0,0.05194133520126343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,64,0,1,fp8,fp8,0,0.04916800061861674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,64,0,1,float16,float16,0,0.05207466582457224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,64,128,1,float16,float16,0,0.052330667773882546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,64,128,1,float16,fp8,0,0.05186666548252106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,64,128,1,fp8,fp8,0,0.04981866478919983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,64,0,1,float16,fp8,0,0.05198933184146881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,64,128,1,float16,float16,0,0.03445333242416382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,64,0,1,fp8,fp8,0,0.04785066843032837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,64,0,1,float16,float16,0,0.03389333436886469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,64,128,1,float16,fp8,0,0.035029334326585136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,64,128,1,fp8,fp8,0,0.03356266766786575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,64,0,1,float16,fp8,0,0.033930666744709015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,64,0,1,fp8,fp8,0,0.03347733368476232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,64,128,1,float16,float16,0,0.03344533344109853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,64,0,1,float16,float16,0,0.03347733368476232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,64,128,1,float16,fp8,0,0.032602667808532715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,64,128,1,fp8,fp8,0,0.033370666205883026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,64,0,1,float16,fp8,0,0.03398933261632919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,64,0,1,fp8,fp8,0,0.031530665854612984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,64,128,1,float16,float16,0,0.03365866591533025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,64,0,1,float16,float16,0,0.033520000676314034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,64,128,1,float16,fp8,0,0.03341866781314214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,64,128,1,fp8,fp8,0,0.03180266668399175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,64,0,1,float16,fp8,0,0.03356266766786575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,64,0,1,fp8,fp8,0,0.03342399994532267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,64,128,1,float16,float16,0,0.03422933320204417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,64,0,1,float16,float16,0,0.034048000971476235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,64,128,1,float16,fp8,0,0.03401066611210505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,64,128,1,fp8,fp8,0,0.0331839993596077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,64,0,1,float16,fp8,0,0.03440533330043157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,64,0,1,fp8,fp8,0,0.03145066648721695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,64,128,1,float16,float16,0,0.027290667096773785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,64,0,1,float16,float16,0,0.02749866743882497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,64,128,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,64,128,1,fp8,fp8,0,0.027434666951497395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,64,0,1,float16,fp8,0,0.02752000093460083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,64,0,1,fp8,fp8,0,0.027269333600997925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,64,128,1,float16,float16,0,0.025797332326571148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,64,0,1,float16,float16,0,0.025392000873883564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,64,128,1,float16,fp8,0,0.026159999271233875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,64,128,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,64,0,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,64,0,1,fp8,fp8,0,0.0252960001428922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,64,128,1,float16,float16,0,0.025311999022960663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,64,0,1,float16,float16,0,0.027130665878454845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,64,128,1,float16,fp8,0,0.02585600068171819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,64,128,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,64,0,1,float16,fp8,0,0.02752533306678136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,64,0,1,fp8,fp8,0,0.025199999411900837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,64,128,1,float16,float16,0,0.0273333340883255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,64,0,1,float16,float16,0,0.025290665527184803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,64,128,1,float16,fp8,0,0.025237334271272022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,64,128,1,fp8,fp8,0,0.02738133321205775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,64,0,1,float16,fp8,0,0.025370667378107708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,64,0,1,fp8,fp8,0,0.02548266698916753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,64,128,1,float16,float16,0,0.019493332753578823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,64,0,1,float16,float16,0,0.018725333114465077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,64,128,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,64,128,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,64,0,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,64,0,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,64,128,1,float16,float16,0,0.01868266612291336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,64,0,1,float16,float16,0,0.019466667125622433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,64,128,1,float16,fp8,0,0.01889066646496455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,64,128,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,64,0,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,64,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,64,128,1,float16,float16,0,0.019023999571800232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,64,0,1,float16,float16,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,64,128,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,64,128,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,64,0,1,float16,fp8,0,0.01952533299724261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,64,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,64,128,1,float16,float16,0,0.01863466699918111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,64,0,1,float16,float16,0,0.018218666315078735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,64,128,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,64,128,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,64,0,1,float16,fp8,0,0.019141333798567455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,64,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,64,128,1,float16,float16,0,0.017231999586025875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,64,0,1,float16,float16,0,0.01759999990463257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,64,128,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,64,128,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,64,0,1,float16,fp8,0,0.017653333644072216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,64,128,1,float16,float16,0,0.017632000148296356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,64,128,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,64,0,1,float16,float16,0,0.01759999990463257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,64,128,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,64,0,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,64,128,1,float16,float16,0,0.017583999782800674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,64,128,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,64,128,1,fp8,fp8,0,0.017450666675964992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,64,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,64,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,64,0,1,float16,float16,0,0.017370666066805523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,64,128,1,float16,float16,0,0.017690667261679966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,64,128,1,float16,fp8,0,0.01791999985774358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,64,128,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,64,0,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,64,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,64,128,1,float16,float16,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,64,0,1,float16,float16,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,64,128,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,64,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,64,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,64,128,1,float16,fp8,0,0.01756799966096878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,64,128,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,64,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,64,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,64,128,1,float16,float16,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,64,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,64,128,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,64,128,1,fp8,fp8,0,0.01584533353646596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,64,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,64,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,64,128,1,float16,float16,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,64,0,1,float16,float16,0,0.017450666675964992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,64,128,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,64,0,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,64,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,64,0,1,float16,float16,0,0.017557332913080852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,64,128,1,float16,fp8,0,0.017535999417304993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,64,0,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,64,128,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,64,0,1,float16,float16,0,0.016805333395799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,64,128,1,float16,float16,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,64,0,1,float16,float16,0,0.016805333395799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,64,128,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,64,0,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,64,0,1,float16,float16,0,0.017477333545684814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,64,128,1,float16,fp8,0,0.0174346665541331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,64,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,64,0,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,64,128,1,float16,float16,0,0.11045333743095398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,64,0,1,float16,float16,0,0.11168533563613892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,64,128,1,float16,fp8,0,0.11125333110491435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,64,128,1,fp8,fp8,0,0.10143466790517171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,64,0,1,fp8,fp8,0,0.10275733470916748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,64,0,1,float16,fp8,0,0.11131200194358826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,64,128,1,float16,float16,0,0.11167466640472412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,64,0,1,float16,float16,0,0.11079466342926025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,64,128,1,float16,fp8,0,0.11180800199508667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,64,128,1,fp8,fp8,0,0.1037066678206126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,64,0,1,float16,fp8,0,0.11104533076286316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,64,0,1,fp8,fp8,0,0.10467200477917989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,64,128,1,float16,float16,0,0.11180266737937927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,64,0,1,float16,float16,0,0.11190932989120483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,64,128,1,float16,fp8,0,0.11166399717330933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,64,128,1,fp8,fp8,0,0.10560533404350281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,64,0,1,float16,fp8,0,0.11191999912261963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,64,0,1,fp8,fp8,0,0.10558399558067322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,64,128,1,float16,float16,0,0.06442133088906606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,64,0,1,float16,float16,0,0.06477333108584087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,64,128,1,float16,fp8,0,0.06628266473611195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,64,128,1,fp8,fp8,0,0.0621919979651769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,64,0,1,float16,fp8,0,0.06458133459091187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,64,0,1,fp8,fp8,0,0.06215466558933258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,64,128,1,float16,float16,0,0.0641599992911021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,64,128,1,float16,fp8,0,0.06277333199977875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,64,128,1,fp8,fp8,0,0.05994133154551188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,64,0,1,float16,float16,0,0.06321600079536438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,64,0,1,float16,fp8,0,0.06436266501744588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,64,0,1,fp8,fp8,0,0.059263999263445534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,64,128,1,float16,float16,0,0.06368533273537953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,64,0,1,float16,float16,0,0.06491200129191081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,64,128,1,float16,fp8,0,0.0629066675901413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,64,128,1,fp8,fp8,0,0.06027733286221822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,64,0,1,float16,fp8,0,0.06346666812896729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,64,128,1,float16,float16,0,0.0636053333679835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,64,0,1,fp8,fp8,0,0.060533334811528526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,64,0,1,float16,float16,0,0.06426666676998138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,64,128,1,float16,fp8,0,0.06442133088906606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,64,128,1,fp8,fp8,0,0.05987200140953064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,64,128,1,float16,float16,0,0.04186133543650309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,64,0,1,fp8,fp8,0,0.059994667768478394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,64,0,1,float16,fp8,0,0.06469333171844482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,64,0,1,float16,float16,0,0.04130133241415024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,64,128,1,float16,fp8,0,0.041850666205088295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,64,128,1,fp8,fp8,0,0.03965866565704346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,64,0,1,float16,fp8,0,0.042064001162846885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,64,0,1,fp8,fp8,0,0.03986666599909464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,64,128,1,float16,float16,0,0.03988266736268997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,64,0,1,float16,float16,0,0.04179200033346812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,64,128,1,float16,fp8,0,0.040063999593257904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,64,128,1,fp8,fp8,0,0.03926933308442434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,64,0,1,float16,fp8,0,0.041519999504089355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,64,0,1,fp8,fp8,0,0.03956799954175949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,64,128,1,float16,float16,0,0.04115733255942663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,64,0,1,float16,float16,0,0.04185600082079569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,64,128,1,float16,fp8,0,0.041482667128245033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,64,128,1,fp8,fp8,0,0.039834665755430855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,64,0,1,float16,fp8,0,0.04159466673930486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,64,0,1,fp8,fp8,0,0.039893334110577904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,64,128,1,float16,float16,0,0.04165333261092504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,64,0,1,float16,float16,0,0.04177600145339966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,64,128,1,float16,fp8,0,0.04020266731580099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,64,0,1,float16,fp8,0,0.04066666712363561
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,64,128,1,fp8,fp8,0,0.03956799954175949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,64,0,1,fp8,fp8,0,0.03836799909671148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,64,128,1,float16,float16,0,0.02902399996916453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,64,0,1,float16,float16,0,0.027461332579453785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,64,128,1,float16,fp8,0,0.02942399928967158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,64,128,1,fp8,fp8,0,0.02779199928045273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,64,0,1,float16,fp8,0,0.02924266705910365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,64,0,1,fp8,fp8,0,0.029232000311215717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,64,128,1,float16,float16,0,0.027424000203609467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,64,128,1,float16,fp8,0,0.02773333340883255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,64,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,64,128,1,fp8,fp8,0,0.027552001178264618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,64,0,1,float16,fp8,0,0.029418667157491047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,64,0,1,fp8,fp8,0,0.027552001178264618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,64,128,1,float16,float16,0,0.02937600016593933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,64,0,1,float16,float16,0,0.029578665892283123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,64,128,1,float16,fp8,0,0.0273333340883255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,64,0,1,float16,fp8,0,0.029296000798543293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,64,128,1,fp8,fp8,0,0.02757333219051361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,64,0,1,fp8,fp8,0,0.027274665733178455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,64,128,1,float16,float16,0,0.02920000006755193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,64,0,1,float16,float16,0,0.02938666691382726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,64,128,1,float16,fp8,0,0.029205332199732464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,64,128,1,fp8,fp8,0,0.027488000690937042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,64,0,1,float16,fp8,0,0.02757866680622101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,64,0,1,fp8,fp8,0,0.029045333464940388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,64,128,1,float16,float16,0,0.02125866711139679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,64,0,1,float16,float16,0,0.02142400046189626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,64,128,1,float16,fp8,0,0.021520001192887623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,64,128,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,64,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,64,0,1,fp8,fp8,0,0.02130666623512904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,64,128,1,float16,float16,0,0.021205333371957142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,64,0,1,float16,float16,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,64,128,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,64,128,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,64,0,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,64,0,1,fp8,fp8,0,0.020245333512624104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,64,128,1,float16,float16,0,0.020997333029905956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,64,0,1,float16,float16,0,0.021381333470344543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,64,128,1,float16,fp8,0,0.021290667355060577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,64,128,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,64,0,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,64,0,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,64,128,1,float16,float16,0,0.02125866711139679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,64,0,1,float16,float16,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,64,128,1,float16,fp8,0,0.021253332495689392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,64,128,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,64,0,1,float16,fp8,0,0.02144533395767212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,64,0,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,64,128,1,float16,float16,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,64,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,64,128,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,64,128,1,fp8,fp8,0,0.018170667191346485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,64,0,1,float16,fp8,0,0.01874133323629697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,64,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,64,128,1,float16,float16,0,0.017488000293572743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,64,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,64,128,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,64,128,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,64,0,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,64,0,1,fp8,fp8,0,0.017504000415404636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,64,128,1,float16,float16,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,64,0,1,fp8,fp8,0,0.017488000293572743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,64,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,64,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,64,128,1,float16,fp8,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,64,128,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,64,0,1,float16,fp8,0,0.018789333601792652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,64,128,1,float16,float16,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,64,0,1,float16,float16,0,0.0173333336909612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,64,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,64,128,1,fp8,fp8,0,0.01584533353646596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,64,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,64,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,64,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,64,128,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,64,128,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,64,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,64,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,64,128,1,float16,float16,0,0.017445333302021027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,64,128,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,64,128,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,64,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,64,0,1,float16,float16,0,0.016016000260909397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,64,128,1,float16,float16,0,0.016122666498025257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,64,128,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,64,128,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,64,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,64,0,1,fp8,fp8,0,0.015610666324694952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,64,128,1,float16,float16,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,64,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,64,128,1,float16,fp8,0,0.017658667018016178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,64,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,64,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,64,0,1,float16,float16,0,0.017509333789348602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,64,128,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,64,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,64,128,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,64,128,1,float16,float16,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,64,0,1,float16,float16,0,0.017466666797796886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,64,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,64,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,64,0,1,float16,fp8,0,0.017583999782800674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,64,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,64,128,1,float16,float16,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,64,0,1,float16,float16,0,0.01590399940808614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,64,128,1,float16,fp8,0,0.017509333789348602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,64,128,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,64,128,1,float16,float16,0,0.017557332913080852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,64,0,1,float16,fp8,0,0.017445333302021027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,64,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,64,128,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,64,128,1,fp8,fp8,0,0.017370666066805523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,64,0,1,float16,fp8,0,0.01657066618402799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,64,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,64,128,1,float16,float16,0,0.015615999698638916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,64,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,64,128,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,64,128,1,fp8,fp8,0,0.015397333850463232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,64,0,1,float16,fp8,0,0.016704000532627106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,64,128,1,float16,float16,0,0.01544533297419548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,64,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,64,128,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,64,0,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,64,128,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,64,0,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,64,128,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,64,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,64,128,1,fp8,fp8,0,0.016149333367745083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,64,0,1,float16,fp8,0,0.01766933376590411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,64,128,1,float16,float16,0,0.08916266759236653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,64,0,1,float16,float16,0,0.0906826655069987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,64,128,1,float16,fp8,0,0.08913066983222961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,64,128,1,fp8,fp8,0,0.08298133313655853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,64,0,1,float16,fp8,0,0.08911466598510742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,64,0,1,fp8,fp8,0,0.08304533362388611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,64,128,1,float16,float16,0,0.09046933054924011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,64,0,1,float16,float16,0,0.08940800031026204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,64,128,1,float16,fp8,0,0.08909866213798523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,64,128,1,fp8,fp8,0,0.0831520011027654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,64,0,1,float16,fp8,0,0.08935466408729553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,64,0,1,fp8,fp8,0,0.08292800188064575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,64,128,1,float16,float16,0,0.09077866872151692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,64,0,1,float16,float16,0,0.08985599875450134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,64,128,1,float16,fp8,0,0.08914666374524434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,64,128,1,fp8,fp8,0,0.08281066517035167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,64,0,1,float16,fp8,0,0.09106133381525676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,64,0,1,fp8,fp8,0,0.08272000153859456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,64,128,1,float16,float16,0,0.05398400127887726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,64,0,1,float16,float16,0,0.05487466851870219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,64,128,1,float16,fp8,0,0.05398400127887726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,64,128,1,fp8,fp8,0,0.052005335688591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,64,0,1,float16,fp8,0,0.05431999762852987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,64,0,1,fp8,fp8,0,0.051776001850763954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,64,128,1,float16,float16,0,0.05392533540725708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,64,128,1,float16,fp8,0,0.053082664807637535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,64,0,1,float16,float16,0,0.05420266588528951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,64,128,1,fp8,fp8,0,0.049679999550183616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,64,0,1,float16,fp8,0,0.05207466582457224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,64,0,1,fp8,fp8,0,0.05091733237107595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,64,0,1,float16,float16,0,0.052389333645502724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,64,128,1,float16,fp8,0,0.053301334381103516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,64,128,1,float16,float16,0,0.05483733117580414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,64,128,1,fp8,fp8,0,0.05089599887530009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,64,0,1,float16,fp8,0,0.05410666763782501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,64,0,1,fp8,fp8,0,0.04997866849104563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,64,0,1,float16,float16,0,0.05440000196297964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,64,128,1,float16,float16,0,0.053183997670809426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,64,128,1,float16,fp8,0,0.05434666574001312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,64,128,1,fp8,fp8,0,0.05000533163547516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,64,0,1,float16,fp8,0,0.05409599840641022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,64,0,1,fp8,fp8,0,0.05041066805521647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,64,128,1,float16,float16,0,0.035674666364987694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,64,0,1,float16,float16,0,0.03571200122435888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,64,128,1,float16,fp8,0,0.035760000348091125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,64,128,1,fp8,fp8,0,0.035589332381884255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,64,0,1,float16,fp8,0,0.03682133307059606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,64,0,1,fp8,fp8,0,0.035061334570248924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,64,128,1,float16,float16,0,0.035173334181308746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,64,0,1,float16,float16,0,0.035349334279696144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,64,128,1,float16,fp8,0,0.035786665976047516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,64,128,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,64,0,1,float16,fp8,0,0.035429333647092186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,64,0,1,fp8,fp8,0,0.033728001018365227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,64,128,1,float16,float16,0,0.035317334036032356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,64,0,1,float16,float16,0,0.03549333413441976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,64,128,1,float16,fp8,0,0.03555200000603994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,64,128,1,fp8,fp8,0,0.033359999457995095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,64,0,1,float16,fp8,0,0.035717333356539406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,64,0,1,fp8,fp8,0,0.03357866654793421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,64,128,1,float16,float16,0,0.03572800010442734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,64,0,1,float16,float16,0,0.035743998984495796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,64,128,1,float16,fp8,0,0.035775999228159584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,64,128,1,fp8,fp8,0,0.03538133452335993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,64,0,1,float16,fp8,0,0.03606399893760681
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,64,0,1,fp8,fp8,0,0.03385599950949351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,64,128,1,float16,float16,0,0.023413332800070446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,64,0,1,float16,float16,0,0.023391999304294586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,64,128,1,float16,fp8,0,0.02510933329661687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,64,128,1,fp8,fp8,0,0.023445333043734234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,64,0,1,float16,fp8,0,0.023647998770078022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,64,0,1,fp8,fp8,0,0.023445333043734234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,64,128,1,float16,float16,0,0.023178666830062866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,64,0,1,float16,float16,0,0.023018665611743927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,64,128,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,64,128,1,fp8,fp8,0,0.02149333308140437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,64,0,1,float16,fp8,0,0.023391999304294586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,64,0,1,fp8,fp8,0,0.023029332359631855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,64,128,1,float16,float16,0,0.023498666783173878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,64,0,1,float16,float16,0,0.023370665808518726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,64,128,1,float16,fp8,0,0.024101334313551586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,64,128,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,64,0,1,float16,fp8,0,0.0240639994541804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,64,0,1,fp8,fp8,0,0.023408000667889912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,64,128,1,float16,float16,0,0.023168000082174938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,64,0,1,float16,float16,0,0.02348266790310542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,64,128,1,float16,fp8,0,0.025461333493391674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,64,128,1,fp8,fp8,0,0.022965334355831146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,64,0,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,64,0,1,fp8,fp8,0,0.02348266790310542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,64,128,1,float16,float16,0,0.01926933353145917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,64,0,1,float16,float16,0,0.01960533360640208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,64,128,1,float16,fp8,0,0.02035733312368393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,64,128,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,64,0,1,float16,fp8,0,0.019839999576409657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,64,0,1,fp8,fp8,0,0.0195573332409064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,64,128,1,float16,float16,0,0.020784000555674236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,64,0,1,float16,float16,0,0.02110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,64,128,1,float16,fp8,0,0.019274666905403137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,64,128,1,fp8,fp8,0,0.019445333629846573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,64,0,1,float16,fp8,0,0.021205333371957142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,64,0,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,64,128,1,float16,float16,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,64,0,1,float16,float16,0,0.019413333386182785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,64,128,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,64,128,1,fp8,fp8,0,0.01945066700379054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,64,0,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,64,0,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,64,128,1,float16,float16,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,64,0,1,float16,float16,0,0.01969066634774208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,64,128,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,64,128,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,64,0,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,64,0,1,fp8,fp8,0,0.021205333371957142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,64,128,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,64,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,64,128,1,fp8,fp8,0,0.015498666713635126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,64,0,1,fp8,fp8,0,0.017509333789348602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,64,128,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,64,128,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,64,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,64,128,1,float16,float16,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,64,0,1,float16,float16,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,64,128,1,fp8,fp8,0,0.015840000162522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,64,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,64,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,64,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,64,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,64,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,64,0,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,64,128,1,fp8,fp8,0,0.015967999895413715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,64,128,1,float16,float16,0,0.014959999670584997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,64,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,64,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,64,128,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,64,0,1,float16,fp8,0,0.015882667154073715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,64,128,1,float16,float16,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,64,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,64,0,1,float16,float16,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,64,128,1,fp8,fp8,0,0.01748266691962878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,64,128,1,float16,float16,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,64,0,1,float16,float16,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,64,128,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,64,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,64,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,64,128,1,float16,float16,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,64,128,1,float16,fp8,0,0.017477333545684814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,64,0,1,fp8,fp8,0,0.015599999576807022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,64,128,1,float16,float16,0,0.015306666493415833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,64,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,64,128,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,64,128,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,64,0,1,float16,fp8,0,0.015392000476519266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,64,128,1,float16,float16,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,64,128,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,64,0,1,float16,fp8,0,0.018458666900793713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,64,128,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,64,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,64,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,64,0,1,float16,float16,0,0.015413332730531693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,64,128,1,float16,fp8,0,0.017509333789348602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,64,128,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,64,128,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,64,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,64,128,1,float16,fp8,0,0.015530666957298914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,64,128,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,64,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,64,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,64,128,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,64,128,1,float16,float16,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,64,0,1,float16,float16,0,0.015370666980743408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,64,128,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,64,128,1,fp8,fp8,0,0.015583999454975128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,64,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,64,128,1,float16,float16,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,64,0,1,float16,float16,0,0.015413332730531693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,64,128,1,fp8,fp8,0,0.015413332730531693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,64,0,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,64,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,64,128,1,float16,float16,0,0.07859733204046886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,64,128,1,float16,fp8,0,0.07889066636562347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,64,0,1,float16,float16,0,0.07938666641712189
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,64,128,1,fp8,fp8,0,0.07425066828727722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,64,0,1,float16,fp8,0,0.07869866490364075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,64,0,1,fp8,fp8,0,0.07445866862932841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,64,128,1,float16,float16,0,0.08019199967384338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,64,0,1,float16,float16,0,0.08063466846942902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,64,128,1,float16,fp8,0,0.07947733501593272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,64,128,1,fp8,fp8,0,0.07484800120194753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,64,0,1,float16,fp8,0,0.07999999821186066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,64,0,1,fp8,fp8,0,0.07487999896208446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,64,128,1,float16,float16,0,0.0798826664686203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,64,0,1,float16,float16,0,0.07852266728878021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,64,128,1,float16,fp8,0,0.07886933286984761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,64,128,1,fp8,fp8,0,0.07433066765467326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,64,0,1,float16,fp8,0,0.07892799874146779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,64,0,1,fp8,fp8,0,0.07392000158627827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,64,128,1,float16,float16,0,0.048021331429481506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,64,0,1,float16,float16,0,0.047930667797724404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,64,128,1,float16,fp8,0,0.049957334995269775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,64,0,1,float16,fp8,0,0.04798933366934458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,64,0,1,fp8,fp8,0,0.04577599962552389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,64,128,1,float16,float16,0,0.04772266745567322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,64,128,1,fp8,fp8,0,0.0480320006608963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,64,0,1,float16,float16,0,0.04799466828505198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,64,128,1,float16,fp8,0,0.04776533444722494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,64,128,1,fp8,fp8,0,0.04718933502833048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,64,0,1,float16,fp8,0,0.048357332746187844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,64,0,1,fp8,fp8,0,0.0462719996770223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,64,128,1,float16,float16,0,0.047872001926104225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,64,0,1,float16,float16,0,0.04808000226815542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,64,0,1,float16,fp8,0,0.04778666794300079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,64,128,1,fp8,fp8,0,0.0460746685663859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,64,128,1,float16,fp8,0,0.04886400202910105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,64,0,1,fp8,fp8,0,0.04576000074545542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,64,128,1,float16,float16,0,0.048058668772379555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,64,0,1,float16,float16,0,0.048058668772379555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,64,128,1,float16,fp8,0,0.04775999983151754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,64,128,1,fp8,fp8,0,0.04609066744645437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,64,0,1,float16,fp8,0,0.04794133206208547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,64,128,1,float16,float16,0,0.03137599925200144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,64,0,1,fp8,fp8,0,0.04570133487383524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,64,0,1,float16,float16,0,0.031727999448776245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,64,128,1,float16,fp8,0,0.03169599920511246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,64,128,1,fp8,fp8,0,0.029317334294319153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,64,0,1,float16,fp8,0,0.0317546675602595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,64,0,1,fp8,fp8,0,0.03047466774781545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,64,128,1,float16,float16,0,0.029370665550231934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,64,0,1,float16,float16,0,0.03176533430814743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,64,128,1,float16,fp8,0,0.031557333966096245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,64,128,1,fp8,fp8,0,0.02958400050799052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,64,0,1,float16,fp8,0,0.0316746657093366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,64,0,1,fp8,fp8,0,0.02942933390537898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,64,128,1,float16,float16,0,0.031248000760873158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,64,0,1,float16,float16,0,0.030159999926884968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,64,128,1,float16,fp8,0,0.030586667358875275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,64,128,1,fp8,fp8,0,0.029450667401154835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,64,0,1,float16,fp8,0,0.031130666534105938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,64,0,1,fp8,fp8,0,0.029663999875386555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,64,128,1,float16,float16,0,0.029274667302767437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,64,0,1,float16,float16,0,0.03173866619666418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,64,128,1,float16,fp8,0,0.031194667021433514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,64,128,1,fp8,fp8,0,0.02962133288383484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,64,0,1,float16,fp8,0,0.03173333406448364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,64,128,1,float16,float16,0,0.023381332556406658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,64,0,1,fp8,fp8,0,0.030063999195893604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,64,0,1,float16,float16,0,0.02362666775782903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,64,128,1,float16,fp8,0,0.02330133318901062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,64,128,1,fp8,fp8,0,0.023103999594847362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,64,0,1,float16,fp8,0,0.023503998915354412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,64,0,1,fp8,fp8,0,0.02319466571013133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,64,128,1,float16,float16,0,0.023290666441122692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,64,0,1,float16,float16,0,0.021562665700912476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,64,128,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,64,128,1,fp8,fp8,0,0.023071999351183575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,64,0,1,float16,fp8,0,0.021583999196688335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,64,128,1,float16,float16,0,0.021429332594076794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,64,0,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,64,0,1,float16,float16,0,0.021685334543387096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,64,128,1,float16,fp8,0,0.021557333568731945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,64,128,1,fp8,fp8,0,0.02130666623512904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,64,0,1,float16,fp8,0,0.02142400046189626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,64,0,1,fp8,fp8,0,0.022634667654832203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,64,128,1,float16,float16,0,0.021168000996112823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,64,128,1,float16,fp8,0,0.023525332411130268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,64,0,1,float16,float16,0,0.02332266668478648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,64,128,1,fp8,fp8,0,0.022848000129063923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,64,0,1,float16,fp8,0,0.021957332889238994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,64,0,1,fp8,fp8,0,0.02327999969323476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,64,128,1,float16,float16,0,0.019354666272799175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,64,0,1,float16,float16,0,0.018885333091020584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,64,128,1,float16,fp8,0,0.019600000232458115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,64,128,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,64,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,64,0,1,float16,fp8,0,0.019391999890406925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,64,128,1,float16,float16,0,0.021061333517233532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,64,0,1,float16,float16,0,0.018901333212852478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,64,128,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,64,128,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,64,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,64,0,1,fp8,fp8,0,0.019343999524911244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,64,128,1,float16,float16,0,0.01889066646496455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,64,0,1,float16,float16,0,0.01899733394384384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,64,128,1,fp8,fp8,0,0.01972266659140587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,64,128,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,64,0,1,float16,fp8,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,64,0,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,64,128,1,float16,float16,0,0.01979200045267741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,64,0,1,float16,float16,0,0.019098666807015736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,64,128,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,64,128,1,fp8,fp8,0,0.019519999623298645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,64,0,1,float16,fp8,0,0.01887999971707662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,64,0,1,fp8,fp8,0,0.018810667097568512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,64,128,1,float16,float16,0,0.015301333119471868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,64,128,1,float16,fp8,0,0.016735999534527462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,64,128,1,fp8,fp8,0,0.0163680004576842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,64,0,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,64,0,1,fp8,fp8,0,0.015530666957298914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,64,0,1,float16,float16,0,0.015728000551462173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,64,128,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,64,0,1,float16,float16,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,64,128,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,64,128,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,64,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,64,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,64,128,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,64,128,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,64,128,1,float16,float16,0,0.01551466683546702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,64,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,64,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,64,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,64,128,1,float16,float16,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,64,0,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,64,128,1,float16,float16,0,0.016773333152135212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,64,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,64,128,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,64,128,1,float16,float16,0,0.015568000574906668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,64,128,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,64,128,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,64,0,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,64,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,64,128,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,64,0,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,64,128,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,64,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,64,128,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,64,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,64,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,64,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,64,0,1,float16,float16,0,0.01651200031240781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,64,128,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,64,128,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,64,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,64,0,1,float16,float16,0,0.016085332880417507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,64,128,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,64,0,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,64,0,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,64,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,64,128,1,float16,float16,0,0.015061333775520325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,64,0,1,float16,float16,0,0.015423999478419622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,64,128,1,float16,fp8,0,0.015386667102575302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,64,128,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,64,128,1,float16,float16,0,0.01543466622630755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,64,0,1,float16,float16,0,0.016735999534527462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,64,128,1,float16,fp8,0,0.015429332852363586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,64,0,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,64,128,1,float16,float16,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,64,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,1,64,128,1,float16,float16,0,0.06825600067774455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,1,64,0,1,float16,float16,0,0.06838933130105336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,1,64,128,1,float16,fp8,0,0.0684799998998642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,1,64,128,1,fp8,fp8,0,0.06459199885527293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,1,64,0,1,float16,fp8,0,0.06871999800205231
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,1,64,0,1,fp8,fp8,0,0.06451733410358429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,2,64,0,1,float16,float16,0,0.06841599941253662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,2,64,128,1,float16,float16,0,0.06889600058396657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,2,64,128,1,float16,fp8,0,0.06856533388296764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,2,64,0,1,float16,fp8,0,0.0689279983441035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,2,64,128,1,fp8,fp8,0,0.06400000055631001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,2,64,0,1,fp8,fp8,0,0.06433600187301636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,4,64,128,1,float16,float16,0,0.06910933554172516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,4,64,0,1,float16,float16,0,0.0687306672334671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,4,64,128,1,float16,fp8,0,0.07038400073846181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,4,64,128,1,fp8,fp8,0,0.06425066788991292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,4,64,0,1,float16,fp8,0,0.06844800213972728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,4,64,0,1,fp8,fp8,0,0.06398933132489522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,12,64,128,1,float16,float16,0,0.04271466533342997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,0,0.04162133236726125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,12,64,128,1,float16,fp8,0,0.04155199974775314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,12,64,128,1,fp8,fp8,0,0.03933866570393244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,0,0.04364799956480662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,12,64,0,1,fp8,fp8,0,0.03986666599909464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,1,64,128,1,float16,float16,0,0.0420959989229838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,1,64,128,1,float16,fp8,0,0.041589332123597465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,1,64,0,1,float16,float16,0,0.04229333500067393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,1,64,128,1,fp8,fp8,0,0.039749334255854286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,1,64,0,1,float16,fp8,0,0.04387733340263367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,2,64,128,1,float16,float16,0,0.04182933270931244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,1,64,0,1,fp8,fp8,0,0.040149333576361336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,2,64,0,1,float16,float16,0,0.04192000130812327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,2,64,128,1,float16,fp8,0,0.041834667325019836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,2,64,128,1,fp8,fp8,0,0.04004266609748205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,2,64,0,1,float16,fp8,0,0.04370133578777313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,2,64,0,1,fp8,fp8,0,0.039450667798519135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,4,64,128,1,float16,float16,0,0.042266666889190674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,4,64,0,1,float16,float16,0,0.041984001795450844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,4,64,128,1,float16,fp8,0,0.04201066493988037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,4,64,128,1,fp8,fp8,0,0.03975466638803482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,4,64,0,1,float16,fp8,0,0.04178133110205332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,4,64,0,1,fp8,fp8,0,0.03979199876387914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,12,64,128,1,float16,float16,0,0.029813334345817566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,0,0.029296000798543293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,12,64,128,1,float16,fp8,0,0.029648000995318096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,12,64,128,1,fp8,fp8,0,0.027813332776228588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,0,0.029663999875386555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,12,64,0,1,fp8,fp8,0,0.027221334477265675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,1,64,128,1,float16,float16,0,0.02937600016593933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,1,64,0,1,float16,float16,0,0.02959466725587845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,1,64,128,1,float16,fp8,0,0.029535998900731403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,1,64,128,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,1,64,0,1,float16,fp8,0,0.029440000653266907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,1,64,0,1,fp8,fp8,0,0.027119999130566914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,2,64,128,1,float16,float16,0,0.02924266705910365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,2,64,0,1,float16,float16,0,0.029418667157491047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,2,64,128,1,float16,fp8,0,0.029637334247430164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,2,64,128,1,fp8,fp8,0,0.027552001178264618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,2,64,0,1,float16,fp8,0,0.029189333319664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,2,64,0,1,fp8,fp8,0,0.027386667827765148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,4,64,128,1,float16,float16,0,0.029653333127498627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,4,64,0,1,float16,float16,0,0.02938666691382726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,4,64,128,1,float16,fp8,0,0.02957333376010259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,4,64,128,1,fp8,fp8,0,0.028010666370391846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,4,64,0,1,float16,fp8,0,0.029626667499542236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,4,64,0,1,fp8,fp8,0,0.02755733331044515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,12,64,128,1,float16,float16,0,0.021488000949223835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,0,0.02293866624434789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,12,64,128,1,float16,fp8,0,0.021509334444999695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,12,64,128,1,fp8,fp8,0,0.02149333308140437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,0,0.023242667317390442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,12,64,0,1,fp8,fp8,0,0.021183999876181286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,1,64,128,1,float16,float16,0,0.021503999829292297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,1,64,0,1,float16,float16,0,0.02306666721900304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,1,64,128,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,1,64,128,1,fp8,fp8,0,0.020954666038354237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,1,64,0,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,1,64,0,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,2,64,128,1,float16,float16,0,0.021488000949223835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,2,64,0,1,float16,float16,0,0.021221332252025604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,2,64,128,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,2,64,128,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,2,64,0,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,2,64,0,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,4,64,128,1,float16,float16,0,0.0223786657055219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,4,64,0,1,float16,float16,0,0.021509334444999695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,4,64,128,1,float16,fp8,0,0.022143999735514324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,4,64,128,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,4,64,0,1,float16,fp8,0,0.023215999205907185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,4,64,0,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,12,64,128,1,float16,float16,0,0.01922133316596349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,0,0.01893866683046023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,12,64,128,1,float16,fp8,0,0.02081599955757459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,12,64,128,1,fp8,fp8,0,0.018805333723624546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,12,64,0,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,1,64,128,1,float16,float16,0,0.019551999866962433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,1,64,0,1,float16,float16,0,0.018965333700180054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,1,64,128,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,1,64,128,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,1,64,0,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,1,64,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,2,64,128,1,float16,float16,0,0.01939733326435089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,2,64,0,1,float16,float16,0,0.019381333142518997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,2,64,128,1,float16,fp8,0,0.021040000021457672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,2,64,128,1,fp8,fp8,0,0.01937599976857503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,2,64,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,2,64,0,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,4,64,128,1,float16,float16,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,4,64,0,1,float16,float16,0,0.019226666539907455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,4,64,128,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,4,64,128,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,4,64,0,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,12,64,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,4,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,12,64,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,12,64,128,1,fp8,fp8,0,0.015541333705186844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,12,64,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,1,64,128,1,float16,float16,0,0.015247999380032221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,1,64,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,1,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,1,64,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,1,64,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,1,64,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,2,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,2,64,0,1,float16,float16,0,0.015930666277805965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,2,64,128,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,2,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,2,64,0,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,2,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,4,64,128,1,float16,float16,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,4,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,4,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,4,64,128,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,4,64,0,1,float16,fp8,0,0.01764800027012825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,4,64,0,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,12,64,128,1,float16,float16,0,0.014917333920796713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,12,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,12,64,128,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,12,64,0,1,fp8,fp8,0,0.016735999534527462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,1,64,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,1,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,1,64,128,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,1,64,128,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,1,64,0,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,1,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,2,64,128,1,float16,float16,0,0.014991999914248785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,2,64,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,2,64,128,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,2,64,128,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,2,64,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,2,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,4,64,128,1,float16,float16,0,0.014933332800865173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,4,64,0,1,float16,float16,0,0.016778666526079178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,4,64,128,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,4,64,128,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,4,64,0,1,float16,fp8,0,0.016773333152135212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,4,64,0,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,12,64,128,1,float16,float16,0,0.015376000354687372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,12,64,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,12,64,128,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,12,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,1,64,128,1,float16,float16,0,0.015311999867359797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,1,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,1,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,1,64,128,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,1,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,1,64,0,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,2,64,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,2,64,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,2,64,128,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,2,64,128,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,2,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,2,64,0,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,4,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,4,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,4,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,4,64,128,1,fp8,fp8,0,0.016271999726692837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,4,64,0,1,float16,fp8,0,0.01545599972208341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,4,64,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,12,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,12,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,12,64,128,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,12,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,1,64,128,1,float16,float16,0,0.014922666052977243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,1,64,0,1,float16,float16,0,0.016469333320856094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,1,64,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,1,64,128,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,1,64,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,1,64,0,1,fp8,fp8,0,0.016197333733240765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,2,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,2,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,2,64,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,2,64,128,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,2,64,0,1,float16,fp8,0,0.016693333784739178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,2,64,0,1,fp8,fp8,0,0.016575999557971954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,4,64,128,1,float16,float16,0,0.014981333166360855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,4,64,0,1,float16,float16,0,0.015664000064134598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,4,64,128,1,float16,fp8,0,0.015439999600251516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,4,64,128,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,4,64,0,1,float16,fp8,0,0.015552000453074774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,4,64,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,64,128,1,float16,float16,0,0.45190401871999103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,64,128,1,float16,fp8,0,0.4578239917755127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,64,128,1,fp8,fp8,0,0.4265013138453166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,64,0,1,float16,float16,0,2.9843358993530273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,64,128,1,float16,float16,0,0.46625598271687824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,64,0,1,float16,fp8,0,2.8357280095418296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,64,128,1,float16,fp8,0,0.47115198771158856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,64,0,1,fp8,fp8,0,2.547935962677002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,64,128,1,fp8,fp8,0,0.44230933984120685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,64,128,1,float16,float16,0,0.4815359910329183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,64,0,1,float16,float16,0,2.88265069325765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,64,128,1,float16,fp8,0,0.48607468605041504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,64,0,1,float16,fp8,0,2.757450739542643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,64,0,1,fp8,fp8,0,2.5647360483805337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,64,128,1,fp8,fp8,0,0.4618026812871297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,64,0,1,float16,float16,0,2.7684427897135415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,64,128,1,float16,float16,0,0.2739786704381307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,64,0,1,float16,fp8,0,2.772864023844401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,64,0,1,fp8,fp8,0,2.5843520164489746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,64,128,1,float16,fp8,0,0.2812426686286926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,64,0,1,float16,float16,0,1.4713600476582844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,64,128,1,fp8,fp8,0,0.26774932940800983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,64,128,1,float16,float16,0,0.24463999271392822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,64,0,1,float16,fp8,0,1.4771359761555989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,64,0,1,fp8,fp8,0,1.3762613932291667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,64,128,1,float16,fp8,0,0.246453324953715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,64,128,1,fp8,fp8,0,0.2330133318901062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,64,0,1,float16,float16,0,1.4387733141581218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,64,128,1,float16,float16,0,0.25094399849573773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,64,0,1,float16,fp8,0,1.4380106925964355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,64,0,1,fp8,fp8,0,1.341493288675944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,64,128,1,float16,fp8,0,0.252890666325887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,64,128,1,fp8,fp8,0,0.24064532915751138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,64,0,1,float16,float16,0,1.4412105878194172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,64,128,1,float16,float16,0,0.2584373354911804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,64,0,1,fp8,fp8,0,1.3478719393412273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,64,0,1,float16,fp8,0,1.4467840194702148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,64,128,1,float16,fp8,0,0.2616159915924072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,64,128,1,fp8,fp8,0,0.24872533480326334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,64,128,1,float16,float16,0,0.15662399927775064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,64,0,1,float16,float16,0,1.4511094093322754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,64,0,1,float16,fp8,0,1.458357334136963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,64,0,1,fp8,fp8,0,1.356378714243571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,64,128,1,float16,fp8,0,0.16062399744987488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,64,0,1,float16,float16,0,0.8120853106180826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,64,128,1,fp8,fp8,0,0.15610667069753012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,64,128,1,float16,float16,0,0.13800533612569174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,64,0,1,fp8,fp8,0,0.7587786515553793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,64,0,1,float16,fp8,0,0.8125973542531332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,64,128,1,float16,fp8,0,0.14034666617711386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,64,0,1,float16,float16,0,0.7926026980082194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,64,128,1,fp8,fp8,0,0.13411200046539307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,64,128,1,float16,float16,0,0.14035200079282126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,64,0,1,float16,fp8,0,0.7903306484222412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,64,0,1,fp8,fp8,0,0.7383733590443929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,64,128,1,float16,fp8,0,0.142085333665212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,64,0,1,float16,float16,0,0.7921333312988281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,64,128,1,fp8,fp8,0,0.13857600092887878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,64,128,1,float16,float16,0,0.14679466684659323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,64,0,1,float16,fp8,0,0.7957013448079427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,64,0,1,fp8,fp8,0,0.7428906758626302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,64,128,1,float16,fp8,0,0.14851199587186178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,64,0,1,float16,float16,0,0.798362652460734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,64,128,1,fp8,fp8,0,0.14448533455530801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,64,128,1,float16,float16,0,0.11548800269762675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,64,0,1,float16,fp8,0,0.8015733559926351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,64,128,1,float16,fp8,0,0.11552000045776367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,64,0,1,fp8,fp8,0,0.7483146985371908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,64,0,1,float16,float16,0,0.4949333270390828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,64,128,1,fp8,fp8,0,0.11123200257619222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,64,128,1,float16,float16,0,0.1135040024916331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,64,0,1,float16,fp8,0,0.49352534612019855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,64,0,1,fp8,fp8,0,0.45870399475097656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,64,128,1,float16,fp8,0,0.11356799801190694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,64,0,1,float16,float16,0,0.4949973424275716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,64,128,1,fp8,fp8,0,0.10931733250617981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,64,0,1,float16,fp8,0,0.49221332867940265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,64,128,1,float16,float16,0,0.11384532848993938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,64,0,1,fp8,fp8,0,0.4588266611099243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,64,0,1,float16,float16,0,0.4915733337402344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,64,128,1,float16,fp8,0,0.11387200156847636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,64,128,1,fp8,fp8,0,0.10956266522407532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,64,128,1,float16,float16,0,0.11370133360226949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,64,0,1,float16,fp8,0,0.492789347966512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,64,0,1,fp8,fp8,0,0.45766401290893555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,64,0,1,float16,float16,0,0.49453334013621014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,64,128,1,float16,fp8,0,0.11340799927711487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,64,128,1,fp8,fp8,0,0.10912533601125081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,64,0,1,float16,fp8,0,0.49356265862782794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,64,128,1,float16,float16,0,0.34351468086242676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,64,0,1,fp8,fp8,0,0.45764267444610596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,64,128,1,float16,fp8,0,0.34815998872121173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,64,128,1,fp8,fp8,0,0.32522666454315186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,64,0,1,float16,float16,0,1.6401386260986328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,64,128,1,float16,float16,0,0.3534613450368245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,64,0,1,float16,fp8,0,1.644576072692871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,64,128,1,float16,fp8,0,0.35741865634918213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,64,0,1,fp8,fp8,0,1.525386651357015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,64,128,1,fp8,fp8,0,0.3409973382949829
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,64,0,1,float16,float16,0,1.7141226132710774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,64,128,1,float16,float16,0,0.37510931491851807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,64,0,1,float16,fp8,0,1.6495200792948406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,64,128,1,float16,fp8,0,0.3690720001856486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,64,0,1,fp8,fp8,0,1.535813331604004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,64,128,1,fp8,fp8,0,0.3508693377176921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,64,0,1,float16,float16,0,1.6592159271240234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,64,128,1,float16,float16,0,0.2118826707204183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,64,128,1,float16,fp8,0,0.21610132853190103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,64,0,1,float16,fp8,0,1.6646186510721843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,64,0,1,fp8,fp8,0,1.5508640607198079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,64,128,1,fp8,fp8,0,0.20668800671895346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,64,0,1,float16,float16,0,0.9016373157501221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,64,128,1,float16,float16,0,0.18636800845464072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,64,0,1,float16,fp8,0,0.9053440093994141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,64,0,1,fp8,fp8,0,0.8431733449300131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,64,128,1,float16,fp8,0,0.1891040007273356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,64,128,1,fp8,fp8,0,0.18091734250386557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,64,0,1,float16,float16,0,0.8716533184051514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,64,128,1,float16,float16,0,0.1914506753285726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,64,0,1,fp8,fp8,0,0.8164107004801432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,64,0,1,float16,fp8,0,0.8773919741312662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,64,128,1,float16,fp8,0,0.1917919913927714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,64,128,1,fp8,fp8,0,0.1854026714960734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,64,0,1,float16,float16,0,0.8757332960764567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,64,128,1,float16,float16,0,0.1980746587117513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,64,0,1,fp8,fp8,0,0.8212052981058756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,64,0,1,float16,fp8,0,0.8776693344116211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,64,128,1,float16,fp8,0,0.20148267348607382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,64,128,1,fp8,fp8,0,0.1921280026435852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,64,0,1,float16,float16,0,0.8835946718851725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,64,128,1,float16,float16,0,0.12380266189575195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,64,0,1,fp8,fp8,0,0.8288053671518961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,64,0,1,float16,fp8,0,0.8873973687489828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,64,0,1,float16,float16,0,0.509882648785909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,64,128,1,float16,fp8,0,0.12522133191426596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,64,128,1,fp8,fp8,0,0.12214932839075725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,64,128,1,float16,float16,0,0.10922132929166158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,64,0,1,float16,fp8,0,0.5114239851633707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,64,0,1,fp8,fp8,0,0.4764426549275716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,64,128,1,float16,fp8,0,0.1118986705938975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,64,0,1,float16,float16,0,0.4927626848220825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,64,128,1,fp8,fp8,0,0.10364266236623128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,64,128,1,float16,float16,0,0.11154133081436157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,64,0,1,fp8,fp8,0,0.4590880076090495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,64,0,1,float16,fp8,0,0.4968106746673584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,64,0,1,float16,float16,0,0.49436267217000324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,64,128,1,float16,fp8,0,0.11336533228556316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,64,128,1,fp8,fp8,0,0.10635200142860413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,64,128,1,float16,float16,0,0.11392533779144287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,64,0,1,float16,fp8,0,0.4971146583557129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,64,0,1,fp8,fp8,0,0.4605493148167928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,64,128,1,float16,fp8,0,0.1174720029036204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,64,0,1,float16,float16,0,0.4988693396250407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,64,128,1,fp8,fp8,0,0.11380799611409505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,64,128,1,float16,float16,0,0.0899679958820343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,64,0,1,float16,fp8,0,0.4986079931259155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,64,0,1,fp8,fp8,0,0.4678933223088582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,64,0,1,float16,float16,0,0.32128000259399414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,64,128,1,float16,fp8,0,0.08918933073679607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,64,128,1,fp8,fp8,0,0.08469866712888081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,64,0,1,float16,fp8,0,0.32070932785669964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,64,128,1,float16,float16,0,0.08881066242853801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,64,0,1,fp8,fp8,0,0.30073599020640057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,64,0,1,float16,float16,0,0.32052799065907794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,64,128,1,float16,fp8,0,0.08900800347328186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,64,128,1,fp8,fp8,0,0.08515200018882751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,64,0,1,float16,fp8,0,0.3219839930534363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,64,128,1,float16,float16,0,0.08867200215657552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,64,0,1,fp8,fp8,0,0.29783467451731366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,64,0,1,float16,float16,0,0.32178133726119995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,64,128,1,float16,fp8,0,0.08896000186602275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,64,128,1,fp8,fp8,0,0.0849226713180542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,64,0,1,float16,fp8,0,0.32064000765482586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,64,128,1,float16,float16,0,0.08890666564305623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,64,0,1,fp8,fp8,0,0.2980960011482239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,64,0,1,float16,float16,0,0.31884799400965375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,64,128,1,float16,fp8,0,0.08894399801890056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,64,128,1,fp8,fp8,0,0.08473066488901775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,64,0,1,float16,fp8,0,0.3207040031750997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,64,0,1,fp8,fp8,0,0.2978559931119283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,64,128,1,float16,float16,0,0.28545065720876056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,64,128,1,float16,fp8,0,0.28894933064778644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,64,128,1,fp8,fp8,0,0.2717866698900859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,64,0,1,float16,float16,0,1.191269318262736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,64,128,1,float16,float16,0,0.2936426599820455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,64,0,1,float16,fp8,0,1.193130652109782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,64,0,1,fp8,fp8,0,1.1115840276082356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,64,128,1,float16,fp8,0,0.29571733872095746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,64,128,1,fp8,fp8,0,0.2789600094159444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,64,0,1,float16,float16,0,1.1987306276957195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,64,128,1,float16,float16,0,0.3019413352012634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,64,0,1,fp8,fp8,0,1.1180906295776367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,64,0,1,float16,fp8,0,1.2029600143432617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,64,128,1,float16,fp8,0,0.3073546687761943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,64,128,1,fp8,fp8,0,0.2911360065142314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,64,0,1,float16,float16,0,1.2103253205617268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,64,128,1,float16,float16,0,0.17866667111714682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,64,0,1,fp8,fp8,0,1.1303733189900715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,64,0,1,float16,fp8,0,1.2166186968485515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,64,128,1,float16,fp8,0,0.18238933881123862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,64,0,1,float16,float16,0,0.6657919883728027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,64,128,1,fp8,fp8,0,0.17506666978200278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,64,128,1,float16,float16,0,0.15454933047294617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,64,0,1,fp8,fp8,0,0.625055988629659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,64,0,1,float16,fp8,0,0.6706933180491129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,64,128,1,float16,fp8,0,0.15661866466204324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,64,128,1,fp8,fp8,0,0.15198399623235068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,64,0,1,float16,float16,0,0.6406240065892538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,64,128,1,float16,float16,0,0.15901866555213928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,64,0,1,float16,fp8,0,0.6456533273061117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,64,0,1,fp8,fp8,0,0.6011573473612467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,64,128,1,float16,fp8,0,0.16083199779192606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,64,0,1,float16,float16,0,0.6444053252538046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,64,128,1,fp8,fp8,0,0.155157337586085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,64,128,1,float16,float16,0,0.16725865999857584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,64,0,1,float16,fp8,0,0.6489546696345011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,64,0,1,fp8,fp8,0,0.6033706665039062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,64,128,1,float16,fp8,0,0.16906134287516275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,64,128,1,fp8,fp8,0,0.1628320018450419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,64,0,1,float16,float16,0,0.6543146769205729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,64,128,1,float16,float16,0,0.1036959985891978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,64,0,1,float16,fp8,0,0.6575466791788737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,64,0,1,fp8,fp8,0,0.6119306484858195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,64,128,1,float16,fp8,0,0.10745599865913391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,64,0,1,float16,float16,0,0.38277868429819745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,64,128,1,fp8,fp8,0,0.1053493320941925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,64,0,1,float16,fp8,0,0.38591468334198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,64,128,1,float16,float16,0,0.09522133072217305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,64,0,1,fp8,fp8,0,0.3599626620610555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,64,128,1,float16,fp8,0,0.09528000156084697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,64,128,1,fp8,fp8,0,0.08919999996821086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,64,0,1,float16,float16,0,0.37247467041015625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,64,128,1,float16,float16,0,0.09523199995358785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,64,0,1,float16,fp8,0,0.3714666763941447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,64,0,1,fp8,fp8,0,0.34492266178131104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,64,128,1,float16,fp8,0,0.09714667002360027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,64,128,1,fp8,fp8,0,0.09110400080680847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,64,0,1,float16,float16,0,0.37251198291778564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,64,128,1,float16,float16,0,0.0981173316637675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,64,0,1,fp8,fp8,0,0.3450719912846883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,64,0,1,float16,fp8,0,0.3731093406677246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,64,128,1,float16,fp8,0,0.09923733274141948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,64,128,1,fp8,fp8,0,0.09647466739018758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,64,0,1,float16,float16,0,0.3767840067545573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,64,128,1,float16,float16,0,0.07861333092053731
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,64,0,1,fp8,fp8,0,0.34993600845336914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,64,0,1,float16,fp8,0,0.3757013479868571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,64,128,1,float16,fp8,0,0.07870399951934814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,64,0,1,float16,float16,0,0.25049599011739093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,64,128,1,fp8,fp8,0,0.07521066566308339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,64,128,1,float16,float16,0,0.07853866616884868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,64,0,1,fp8,fp8,0,0.23426133394241333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,64,0,1,float16,fp8,0,0.25094934304555255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,64,128,1,fp8,fp8,0,0.07525866727034251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,64,128,1,float16,fp8,0,0.0788213312625885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,64,0,1,float16,float16,0,0.252074658870697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,64,128,1,float16,float16,0,0.07875733574231465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,64,0,1,fp8,fp8,0,0.23455466826756796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,64,0,1,float16,fp8,0,0.25177067518234253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,64,128,1,float16,fp8,0,0.0794239987929662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,64,128,1,fp8,fp8,0,0.07653866708278656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,64,0,1,float16,float16,0,0.25093867381413776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,64,0,1,float16,fp8,0,0.25301865736643475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,64,0,1,fp8,fp8,0,0.2344640096028646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,64,128,1,float16,float16,0,0.07875200112660725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,64,128,1,float16,fp8,0,0.07853866616884868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,64,0,1,float16,float16,0,0.25221866369247437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,64,128,1,fp8,fp8,0,0.07472000022729237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,64,0,1,float16,fp8,0,0.2521760066350301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,64,0,1,fp8,fp8,0,0.23291732867558798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,64,128,1,float16,float16,0,0.442848006884257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,64,128,1,float16,fp8,0,0.4471733172734578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,64,128,1,fp8,fp8,0,0.4164533217748006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,64,0,1,float16,float16,0,1.5554933547973633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,64,128,1,float16,float16,0,0.4569173256556193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,64,0,1,float16,fp8,0,1.5599253972371419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,64,128,1,float16,fp8,0,0.4611626863479614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,64,0,1,fp8,fp8,0,1.4480373064676921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,64,128,1,fp8,fp8,0,0.4325386683146159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,64,0,1,float16,float16,0,1.569167931874593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,64,128,1,float16,float16,0,0.4711680014928182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,64,0,1,fp8,fp8,0,1.4637920061747234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,64,0,1,float16,fp8,0,1.5752959251403809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,64,128,1,float16,fp8,0,0.47625601291656494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,64,128,1,fp8,fp8,0,0.45152533054351807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,64,0,1,float16,float16,0,1.5873279571533203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,64,128,1,float16,float16,0,0.26336000363032025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,64,128,1,float16,fp8,0,0.26870934168497723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,64,0,1,float16,fp8,0,1.5925440788269043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,64,0,1,float16,float16,0,0.8480319976806641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,64,0,1,fp8,fp8,0,1.4825280507405598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,64,128,1,fp8,fp8,0,0.25517867008845013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,64,128,1,float16,float16,0,0.2325813372929891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,64,0,1,float16,fp8,0,0.8547466595967611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,64,0,1,fp8,fp8,0,0.794922669728597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,64,128,1,float16,fp8,0,0.23437867561976114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,64,0,1,float16,float16,0,0.8155786991119385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,64,128,1,fp8,fp8,0,0.2221440076828003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,64,128,1,float16,float16,0,0.2383306622505188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,64,0,1,float16,fp8,0,0.8159306844075521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,64,0,1,fp8,fp8,0,0.7601280212402344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,64,128,1,float16,fp8,0,0.2410773237546285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,64,0,1,float16,float16,0,0.8204267024993896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,64,128,1,fp8,fp8,0,0.2281493345896403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,64,128,1,float16,float16,0,0.24714134136835733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,64,0,1,float16,fp8,0,0.8207253615061442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,64,0,1,fp8,fp8,0,0.7688319683074951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,64,128,1,float16,fp8,0,0.2513440052668254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,64,0,1,float16,float16,0,0.8287573655446371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,64,128,1,fp8,fp8,0,0.2364906668663025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,64,128,1,float16,float16,0,0.14346667130788168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,64,0,1,float16,fp8,0,0.8350666364034017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,64,0,1,fp8,fp8,0,0.7774399916330973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,64,128,1,float16,fp8,0,0.14807466665903726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,64,0,1,float16,float16,0,0.46555201212565106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,64,128,1,fp8,fp8,0,0.14215999841690063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,64,0,1,float16,fp8,0,0.470085342725118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,64,0,1,fp8,fp8,0,0.4373813470204671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,64,128,1,float16,float16,0,0.12339199582735698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,64,128,1,float16,fp8,0,0.12592533230781555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,64,128,1,fp8,fp8,0,0.11974400281906128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,64,0,1,float16,float16,0,0.4444640080134074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,64,128,1,float16,float16,0,0.1279253363609314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,64,0,1,float16,fp8,0,0.4456640084584554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,64,0,1,fp8,fp8,0,0.41598931948343915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,64,128,1,fp8,fp8,0,0.12574399511019388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,64,128,1,float16,fp8,0,0.1280586620171865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,64,0,1,float16,float16,0,0.446234663327535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,64,128,1,float16,float16,0,0.13194666306177774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,64,0,1,fp8,fp8,0,0.41974933942159015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,64,0,1,float16,fp8,0,0.4503093163172404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,64,128,1,float16,fp8,0,0.1344586710135142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,64,128,1,fp8,fp8,0,0.13193066914876303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,64,0,1,float16,float16,0,0.45421866575876874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,64,128,1,float16,float16,0,0.08488532900810242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,64,0,1,fp8,fp8,0,0.42873068650563556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,64,0,1,float16,fp8,0,0.4540213346481323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,64,128,1,float16,fp8,0,0.08690133690834045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,64,128,1,fp8,fp8,0,0.08706133564313252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,64,0,1,float16,float16,0,0.27375467618306476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,64,128,1,float16,float16,0,0.07870399951934814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,64,0,1,fp8,fp8,0,0.25909332434336346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,64,0,1,float16,fp8,0,0.2769013245900472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,64,128,1,fp8,fp8,0,0.07444266478220622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,64,128,1,float16,fp8,0,0.07856533428033192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,64,0,1,float16,float16,0,0.2656906644503276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,64,128,1,float16,float16,0,0.07874133189519246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,64,0,1,fp8,fp8,0,0.2469066580136617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,64,0,1,float16,fp8,0,0.2677119970321655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,64,128,1,float16,fp8,0,0.0804746647675832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,64,0,1,float16,float16,0,0.26729599634806317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,64,128,1,fp8,fp8,0,0.07469866673151652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,64,128,1,float16,float16,0,0.08083199958006541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,64,0,1,float16,fp8,0,0.2667413353919983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,64,0,1,fp8,fp8,0,0.24715199073155722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,64,128,1,float16,fp8,0,0.08243733147780101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,64,128,1,fp8,fp8,0,0.07884799937407176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,64,0,1,float16,float16,0,0.2700693408648173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,64,128,1,float16,float16,0,0.06224533418814341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,64,0,1,fp8,fp8,0,0.24989332755406699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,64,0,1,float16,fp8,0,0.2693600058555603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,64,0,1,float16,float16,0,0.18619734048843384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,64,128,1,fp8,fp8,0,0.06044800082842509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,64,128,1,float16,fp8,0,0.06384533147017162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,64,0,1,float16,fp8,0,0.18551466862360635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,64,128,1,float16,float16,0,0.06241600215435028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,64,0,1,fp8,fp8,0,0.17355199654897055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,64,128,1,float16,fp8,0,0.062421331803003945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,64,0,1,float16,float16,0,0.18686399857203165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,64,128,1,fp8,fp8,0,0.060533334811528526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,64,0,1,float16,fp8,0,0.18626666069030762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,64,0,1,fp8,fp8,0,0.17345066865285239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,64,128,1,float16,float16,0,0.064560001095136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,64,0,1,float16,float16,0,0.18523200352986655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,64,128,1,float16,fp8,0,0.06422933439413707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,64,128,1,fp8,fp8,0,0.06188266475995382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,64,0,1,float16,fp8,0,0.18769067525863647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,64,128,1,float16,float16,0,0.06422399977842967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,64,0,1,fp8,fp8,0,0.17300800482432047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,64,0,1,float16,float16,0,0.1854026714960734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,64,128,1,float16,fp8,0,0.06443200012048085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,64,128,1,fp8,fp8,0,0.062309334675470986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,64,0,1,float16,fp8,0,0.18529599905014038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,64,0,1,fp8,fp8,0,0.17303999265034994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,64,128,1,float16,float16,0,0.3381226857503255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,64,128,1,float16,fp8,0,0.3397546609242757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,64,128,1,fp8,fp8,0,0.31646933158238727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,64,0,1,float16,float16,0,0.9616959889729818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,64,128,1,float16,float16,0,0.3468053340911865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,64,0,1,float16,fp8,0,0.963152011235555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,64,0,1,fp8,fp8,0,0.890069325764974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,64,128,1,float16,fp8,0,0.35040001074473065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,64,0,1,float16,float16,0,0.9695999622344971
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,64,128,1,fp8,fp8,0,0.32897067070007324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,64,128,1,float16,float16,0,0.3587840000788371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,64,0,1,float16,fp8,0,0.9726133346557617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,64,0,1,fp8,fp8,0,0.9028639793395996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,64,0,1,float16,float16,0,0.9838293393452963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,64,128,1,float16,fp8,0,0.36281601587931317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,64,128,1,fp8,fp8,0,0.34300800164540607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,64,128,1,float16,float16,0,0.20360000928243002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,64,0,1,float16,fp8,0,0.9843200047810873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,64,0,1,float16,float16,0,0.5356213251749674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,64,128,1,float16,fp8,0,0.2092906634012858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,64,128,1,fp8,fp8,0,0.19818133115768433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,64,0,1,fp8,fp8,0,0.9171679814656576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,64,128,1,float16,float16,0,0.17721066872278848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,64,0,1,float16,fp8,0,0.5394933223724365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,64,0,1,fp8,fp8,0,0.5033813317616781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,64,0,1,float16,float16,0,0.5089066823323568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,64,128,1,float16,fp8,0,0.17923200130462646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,64,128,1,fp8,fp8,0,0.1709973414738973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,64,0,1,float16,fp8,0,0.5094346602757772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,64,128,1,float16,float16,0,0.18266665935516357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,64,0,1,fp8,fp8,0,0.4761066834131877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,64,128,1,float16,fp8,0,0.18480533361434937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,64,128,1,fp8,fp8,0,0.176362673441569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,64,0,1,float16,float16,0,0.511733333269755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,64,0,1,float16,fp8,0,0.5131253401438395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,64,128,1,float16,float16,0,0.19150932629903158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,64,0,1,fp8,fp8,0,0.4803573290506999
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,64,128,1,float16,fp8,0,0.1932906707127889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,64,128,1,fp8,fp8,0,0.18409067392349243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,64,0,1,float16,float16,0,0.5192319949467977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,64,128,1,float16,float16,0,0.11369599898656209
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,64,0,1,float16,fp8,0,0.5238879919052124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,64,0,1,fp8,fp8,0,0.48770666122436523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,64,128,1,float16,fp8,0,0.1169653336207072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,64,0,1,float16,float16,0,0.29928000768025714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,64,128,1,fp8,fp8,0,0.1137600044409434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,64,128,1,float16,float16,0,0.09948800007502238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,64,0,1,float16,fp8,0,0.3036213318506877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,64,0,1,fp8,fp8,0,0.28355733553568524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,64,128,1,float16,fp8,0,0.10069333513577779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,64,128,1,fp8,fp8,0,0.09500799576441447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,64,0,1,float16,float16,0,0.28519999980926514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,64,128,1,float16,float16,0,0.10123200217882793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,64,0,1,float16,fp8,0,0.28649600346883136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,64,0,1,fp8,fp8,0,0.26332799593607586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,64,128,1,float16,fp8,0,0.1018346647421519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,64,128,1,fp8,fp8,0,0.09800533453623454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,64,0,1,float16,float16,0,0.2882293264071147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,64,128,1,float16,float16,0,0.10515200098355611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,64,0,1,float16,fp8,0,0.28861333926518756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,64,0,1,fp8,fp8,0,0.26710933446884155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,64,128,1,float16,fp8,0,0.10730133454004924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,64,128,1,fp8,fp8,0,0.10339200496673584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,64,0,1,float16,float16,0,0.2905706763267517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,64,128,1,float16,float16,0,0.0664213349421819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,64,0,1,float16,fp8,0,0.2930826743443807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,64,0,1,fp8,fp8,0,0.27373333772023517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,64,128,1,float16,fp8,0,0.06840000053246816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,64,0,1,float16,float16,0,0.17957866191864014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,64,128,1,fp8,fp8,0,0.06630399823188782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,64,128,1,float16,float16,0,0.062080000837643944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,64,0,1,float16,fp8,0,0.18121600151062012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,64,0,1,fp8,fp8,0,0.1693333387374878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,64,128,1,float16,fp8,0,0.06206933160622915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,64,128,1,fp8,fp8,0,0.05866133173306783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,64,0,1,float16,float16,0,0.1750133236249288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,64,128,1,float16,float16,0,0.06284800171852112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,64,0,1,float16,fp8,0,0.17591466506322226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,64,0,1,fp8,fp8,0,0.16262400150299072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,64,128,1,float16,fp8,0,0.06257600088914235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,64,128,1,fp8,fp8,0,0.06002666552861532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,64,0,1,float16,float16,0,0.17569067080815634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,64,0,1,float16,fp8,0,0.17722666263580322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,64,128,1,float16,float16,0,0.06282666822274525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,64,0,1,fp8,fp8,0,0.16459733247756958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,64,128,1,float16,fp8,0,0.06533866624037425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,64,0,1,float16,float16,0,0.1760853330294291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,64,128,1,fp8,fp8,0,0.06214400132497152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,64,0,1,float16,fp8,0,0.17726399501164755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,64,0,1,fp8,fp8,0,0.1656053364276886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,64,128,1,float16,float16,0,0.05421866476535797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,64,0,1,float16,float16,0,0.12469333410263062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,64,128,1,float16,fp8,0,0.05429333448410034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,64,128,1,fp8,fp8,0,0.05208000044027964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,64,0,1,float16,fp8,0,0.12642133235931396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,64,0,1,fp8,fp8,0,0.11745066444079082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,64,128,1,float16,float16,0,0.05426133175690969
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,64,0,1,float16,float16,0,0.12569066882133484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,64,128,1,float16,fp8,0,0.054144000013669334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,64,128,1,fp8,fp8,0,0.052069331208864846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,64,0,1,float16,fp8,0,0.12575466434160867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,64,0,1,fp8,fp8,0,0.11758933464686076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,64,128,1,float16,float16,0,0.053946668903032936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,64,0,1,float16,float16,0,0.12585066755612692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,64,128,1,float16,fp8,0,0.054197331269582115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,64,128,1,fp8,fp8,0,0.051856001218159996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,64,0,1,float16,fp8,0,0.12575466434160867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,64,0,1,fp8,fp8,0,0.11759466926256816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,64,128,1,float16,float16,0,0.054234668612480164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,64,0,1,float16,float16,0,0.12599999705950418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,64,128,1,float16,fp8,0,0.054048001766204834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,64,128,1,fp8,fp8,0,0.052069331208864846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,64,0,1,float16,fp8,0,0.12611732880274454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,64,0,1,fp8,fp8,0,0.11761599779129028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,64,128,1,float16,float16,0,0.4418933391571045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,64,128,1,float16,fp8,0,0.4455999930699666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,64,128,1,fp8,fp8,0,0.4148586591084798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,64,0,1,float16,float16,0,0.9635199705759684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,64,128,1,float16,float16,0,0.45604801177978516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,64,0,1,float16,fp8,0,0.9673386414845785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,64,0,1,fp8,fp8,0,0.8928106625874838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,64,128,1,float16,fp8,0,0.46015465259552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,64,0,1,float16,float16,0,0.9755787054697672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,64,128,1,fp8,fp8,0,0.4306559960047404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,64,128,1,float16,float16,0,0.4723413387934367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,64,0,1,float16,fp8,0,0.977679967880249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,64,0,1,fp8,fp8,0,0.9065919717152914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,64,0,1,float16,float16,0,0.9910666942596436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,64,128,1,float16,fp8,0,0.4766933520634969
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,64,128,1,fp8,fp8,0,0.4481920003890991
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,64,128,1,float16,float16,0,0.25998934110005695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,64,0,1,float16,fp8,0,0.9975626468658447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,64,0,1,fp8,fp8,0,0.9257973035176595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,64,0,1,float16,float16,0,0.532421350479126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,64,128,1,float16,fp8,0,0.2646613319714864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,64,128,1,fp8,fp8,0,0.25178666909535724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,64,0,1,float16,fp8,0,0.5372320016225179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,64,0,1,fp8,fp8,0,0.5011626482009888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,64,128,1,float16,float16,0,0.22734934091567993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,64,128,1,float16,fp8,0,0.2309653361638387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,64,128,1,fp8,fp8,0,0.2165600061416626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,64,0,1,float16,float16,0,0.49941333134969074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,64,128,1,float16,float16,0,0.23233066002527872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,64,0,1,float16,fp8,0,0.503109335899353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,64,0,1,fp8,fp8,0,0.46800533930460614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,64,128,1,fp8,fp8,0,0.22530666987101236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,64,128,1,float16,fp8,0,0.2362933357556661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,64,0,1,float16,float16,0,0.5033973455429077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,64,128,1,float16,float16,0,0.24211732546488443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,64,0,1,fp8,fp8,0,0.47414934635162354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,64,0,1,float16,fp8,0,0.5090719858805338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,64,128,1,float16,fp8,0,0.24641066789627075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,64,128,1,fp8,fp8,0,0.23313599824905396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,64,0,1,float16,float16,0,0.5137813488642374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,64,128,1,float16,float16,0,0.13876799742380777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,64,0,1,fp8,fp8,0,0.4821386734644572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,64,0,1,float16,fp8,0,0.5178293387095133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,64,0,1,float16,float16,0,0.29081066449483234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,64,128,1,float16,fp8,0,0.14110400279362997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,64,128,1,fp8,fp8,0,0.1383946637312571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,64,128,1,float16,float16,0,0.11806399623552959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,64,0,1,float16,fp8,0,0.2927466630935669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,64,0,1,fp8,fp8,0,0.275434672832489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,64,128,1,float16,fp8,0,0.12084266543388367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,64,128,1,fp8,fp8,0,0.1153706709543864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,64,0,1,float16,float16,0,0.27024000883102417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,64,128,1,float16,float16,0,0.1200266679128011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,64,0,1,float16,fp8,0,0.2695680061976115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,64,0,1,float16,float16,0,0.2716533342997233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,64,0,1,fp8,fp8,0,0.251418670018514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,64,128,1,float16,fp8,0,0.12346133589744568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,64,128,1,fp8,fp8,0,0.11938666303952535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,64,0,1,float16,fp8,0,0.2746613423029582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,64,128,1,float16,float16,0,0.1265066663424174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,64,0,1,fp8,fp8,0,0.2573653260866801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,64,0,1,float16,float16,0,0.2796906630198161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,64,128,1,float16,fp8,0,0.13009066383043924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,64,128,1,fp8,fp8,0,0.12617599964141846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,64,0,1,float16,fp8,0,0.2807893355687459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,64,128,1,float16,float16,0,0.07863999903202057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,64,0,1,float16,float16,0,0.165994664033254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,64,0,1,fp8,fp8,0,0.26479466756184894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,64,128,1,float16,fp8,0,0.08056533336639404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,64,128,1,fp8,fp8,0,0.08075733482837677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,64,0,1,float16,fp8,0,0.16827734311421713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,64,0,1,fp8,fp8,0,0.15937599539756775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,64,128,1,float16,float16,0,0.07107200225194295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,64,0,1,float16,float16,0,0.16074666380882263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,64,128,1,float16,fp8,0,0.07253866891066234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,64,128,1,fp8,fp8,0,0.06816533207893372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,64,0,1,float16,fp8,0,0.16010666886965433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,64,128,1,float16,float16,0,0.07124266525109609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,64,0,1,fp8,fp8,0,0.14882666865984598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,64,0,1,float16,float16,0,0.1590933303038279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,64,128,1,float16,fp8,0,0.0728053351243337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,64,128,1,fp8,fp8,0,0.0684746652841568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,64,0,1,float16,fp8,0,0.16124799847602844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,64,0,1,fp8,fp8,0,0.14846400419871011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,64,128,1,float16,float16,0,0.07318933308124542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,64,0,1,float16,float16,0,0.16129600008328757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,64,128,1,float16,fp8,0,0.07638399799664815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,64,128,1,fp8,fp8,0,0.07234133283297221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,64,128,1,float16,float16,0,0.04976533353328705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,64,0,1,float16,fp8,0,0.16310399770736694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,64,0,1,fp8,fp8,0,0.15211733182271323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,64,0,1,float16,float16,0,0.10757866501808167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,64,128,1,float16,fp8,0,0.050101334849993386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,64,128,1,fp8,fp8,0,0.048341333866119385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,64,0,1,float16,fp8,0,0.10982400178909302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,64,0,1,fp8,fp8,0,0.10258133212725322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,64,128,1,float16,float16,0,0.0459146648645401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,64,128,1,float16,fp8,0,0.04782933493455251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,64,128,1,fp8,fp8,0,0.0458186666170756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,64,0,1,float16,float16,0,0.10406933228174846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,64,0,1,float16,fp8,0,0.1039466659228007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,64,128,1,float16,float16,0,0.04626133541266123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,64,0,1,fp8,fp8,0,0.09896000226338704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,64,0,1,float16,float16,0,0.10558933019638062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,64,128,1,float16,fp8,0,0.046021332343419395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,64,128,1,fp8,fp8,0,0.04394133388996124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,64,0,1,float16,fp8,0,0.10571733117103577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,64,0,1,fp8,fp8,0,0.09939733147621155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,64,128,1,float16,float16,0,0.0462666650613149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,64,0,1,float16,float16,0,0.10553600390752156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,64,128,1,float16,fp8,0,0.04771199822425842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,64,128,1,fp8,fp8,0,0.046069333950678505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,64,0,1,float16,fp8,0,0.10796800255775452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,64,128,1,float16,float16,0,0.0393653338154157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,64,0,1,fp8,fp8,0,0.09914666414260864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,64,0,1,float16,float16,0,0.07672533392906189
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,64,128,1,float16,fp8,0,0.03965866565704346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,64,128,1,fp8,fp8,0,0.03772266705830892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,64,0,1,float16,fp8,0,0.07691200077533722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,64,0,1,fp8,fp8,0,0.07253333429495494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,64,128,1,float16,float16,0,0.03955200066169103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,64,0,1,float16,float16,0,0.0765066643555959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,64,128,1,float16,fp8,0,0.03951466580231985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,64,128,1,fp8,fp8,0,0.03729599962631861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,64,0,1,float16,fp8,0,0.07686399916807811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,64,0,1,fp8,fp8,0,0.07218666871388753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,64,128,1,float16,float16,0,0.03757333258787791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,64,128,1,float16,fp8,0,0.03760000069936117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,64,0,1,float16,float16,0,0.07679999868075053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,64,128,1,fp8,fp8,0,0.03774933268626531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,64,0,1,float16,fp8,0,0.07678399980068207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,64,0,1,fp8,fp8,0,0.07223999996980031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,64,128,1,float16,float16,0,0.03932266682386398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,64,128,1,float16,fp8,0,0.03803733239571253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,64,0,1,float16,float16,0,0.0767146646976471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,64,128,1,fp8,fp8,0,0.03806933263937632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,64,0,1,float16,fp8,0,0.07658666869004567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,64,0,1,fp8,fp8,0,0.0724480003118515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,64,128,1,float16,float16,0,0.3431839942932129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,64,128,1,float16,fp8,0,0.3461120128631592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,64,0,1,float16,float16,0,0.6232639948527018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,64,128,1,fp8,fp8,0,0.3202773332595825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,64,0,1,float16,fp8,0,0.626858671506246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,64,128,1,float16,float16,0,0.3537333408991496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,64,0,1,fp8,fp8,0,0.5780426661173502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,64,128,1,float16,fp8,0,0.3550773461659749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,64,128,1,fp8,fp8,0,0.3327946662902832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,64,0,1,float16,float16,0,0.637114683787028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,64,128,1,float16,float16,0,0.3699680169423421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,64,0,1,float16,fp8,0,0.6363199949264526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,64,0,1,fp8,fp8,0,0.5902613401412964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,64,128,1,float16,fp8,0,0.3698986768722534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,64,128,1,fp8,fp8,0,0.3465760151545207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,64,0,1,float16,float16,0,0.6529386838277181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,64,128,1,float16,float16,0,0.204202671845754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,64,0,1,fp8,fp8,0,0.6040960152943929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,64,0,1,float16,fp8,0,0.6490826606750488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,64,0,1,float16,float16,0,0.35507198174794513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,64,128,1,float16,fp8,0,0.20588266849517822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,64,128,1,fp8,fp8,0,0.197434663772583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,64,0,1,float16,fp8,0,0.3582933346430461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,64,128,1,float16,float16,0,0.17449599504470825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,64,0,1,float16,float16,0,0.3242453336715698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,64,0,1,fp8,fp8,0,0.3354506492614746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,64,128,1,float16,fp8,0,0.17614932854970297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,64,128,1,fp8,fp8,0,0.17010666926701865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,64,0,1,float16,fp8,0,0.3241493304570516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,64,128,1,float16,float16,0,0.18000000715255737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,64,0,1,fp8,fp8,0,0.30697067578633624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,64,0,1,float16,float16,0,0.328490674495697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,64,128,1,float16,fp8,0,0.18109333515167236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,64,128,1,fp8,fp8,0,0.17300800482432047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,64,0,1,float16,fp8,0,0.3340799808502197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,64,0,1,fp8,fp8,0,0.31060800949732464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,64,128,1,float16,float16,0,0.19010132551193237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,64,0,1,float16,float16,0,0.3408159812291463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,64,128,1,float16,fp8,0,0.19223999977111816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,64,128,1,fp8,fp8,0,0.1818986733754476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,64,0,1,float16,fp8,0,0.3409706751505534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,64,128,1,float16,float16,0,0.10974400242169698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,64,0,1,fp8,fp8,0,0.3206079999605815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,64,0,1,float16,float16,0,0.19251734018325806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,64,128,1,float16,fp8,0,0.11108266313870747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,64,128,1,fp8,fp8,0,0.10942932963371277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,64,0,1,float16,fp8,0,0.1982133388519287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,64,0,1,fp8,fp8,0,0.18596800168355307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,64,128,1,float16,float16,0,0.0939359962940216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,64,0,1,float16,float16,0,0.17882666985193887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,64,128,1,float16,fp8,0,0.09525332848230998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,64,128,1,fp8,fp8,0,0.08871466914812724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,64,0,1,float16,fp8,0,0.17854400475819907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,64,0,1,fp8,fp8,0,0.16671999295552573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,64,128,1,float16,float16,0,0.09642133116722107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,64,0,1,float16,float16,0,0.17893334229787192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,64,128,1,float16,fp8,0,0.0972160001595815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,64,128,1,fp8,fp8,0,0.09318932890892029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,64,0,1,float16,fp8,0,0.1824586590131124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,64,0,1,fp8,fp8,0,0.16759467124938965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,64,128,1,float16,float16,0,0.10030399759610494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,64,128,1,float16,fp8,0,0.10229333241780598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,64,0,1,float16,float16,0,0.18396266301472983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,64,128,1,fp8,fp8,0,0.09919466574986775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,64,0,1,float16,fp8,0,0.1856480042139689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,64,128,1,float16,float16,0,0.06251200040181477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,64,0,1,fp8,fp8,0,0.1777013341585795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,64,0,1,float16,float16,0,0.11239999532699585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,64,128,1,float16,fp8,0,0.06409599880377452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,64,128,1,fp8,fp8,0,0.060602664947509766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,64,0,1,float16,fp8,0,0.11365333199501038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,64,0,1,fp8,fp8,0,0.10769066214561462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,64,128,1,float16,float16,0,0.058149332801500954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,64,0,1,float16,float16,0,0.10872000455856323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,64,128,1,float16,fp8,0,0.058058664202690125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,64,128,1,fp8,fp8,0,0.05461333195368449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,64,0,1,float16,fp8,0,0.10788800319035848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,64,0,1,fp8,fp8,0,0.10199466347694397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,64,128,1,float16,float16,0,0.058186665177345276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,64,0,1,float16,float16,0,0.10798399647076924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,64,128,1,float16,fp8,0,0.05952533086140951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,64,128,1,fp8,fp8,0,0.05606933434804281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,64,0,1,float16,fp8,0,0.10990933577219646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,64,0,1,fp8,fp8,0,0.10171733299891154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,64,128,1,float16,float16,0,0.060271998246510826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,64,0,1,float16,float16,0,0.10977600018183391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,64,128,1,float16,fp8,0,0.060229331254959106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,64,128,1,fp8,fp8,0,0.058431997895240784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,64,0,1,float16,fp8,0,0.11145066221555074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,64,128,1,float16,float16,0,0.04378133515516917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,64,0,1,fp8,fp8,0,0.10389332969983418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,64,0,1,float16,float16,0,0.0748533308506012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,64,128,1,float16,fp8,0,0.04398400088151296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,64,128,1,fp8,fp8,0,0.041893333196640015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,64,0,1,float16,fp8,0,0.0748586654663086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,64,128,1,float16,float16,0,0.04190400242805481
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,64,0,1,fp8,fp8,0,0.07044800122578938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,64,0,1,float16,float16,0,0.07276266813278198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,64,128,1,float16,fp8,0,0.04164800047874451
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,64,128,1,fp8,fp8,0,0.040005333721637726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,64,128,1,float16,float16,0,0.041690667470296226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,64,0,1,fp8,fp8,0,0.06936533252398173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,64,0,1,float16,fp8,0,0.07414400080839793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,64,128,1,float16,fp8,0,0.04205866654713949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,64,128,1,fp8,fp8,0,0.0408693328499794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,64,0,1,float16,float16,0,0.07361599802970886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,64,128,1,float16,float16,0,0.04194133480389913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,64,0,1,fp8,fp8,0,0.0692799985408783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,64,0,1,float16,fp8,0,0.07250666618347168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,64,128,1,fp8,fp8,0,0.04013866682847341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,64,128,1,float16,fp8,0,0.04177066683769226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,64,0,1,float16,float16,0,0.07449600100517273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,64,128,1,float16,float16,0,0.033589333295822144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,64,0,1,float16,fp8,0,0.07427733143170674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,64,0,1,fp8,fp8,0,0.06853333115577698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,64,128,1,float16,fp8,0,0.03349333256483078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,64,0,1,float16,float16,0,0.06021333237489065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,64,128,1,fp8,fp8,0,0.03166399896144867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,64,0,1,float16,fp8,0,0.0621066689491272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,64,128,1,float16,float16,0,0.03378133227427801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,64,0,1,fp8,fp8,0,0.058005332946777344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,64,128,1,float16,fp8,0,0.033728001018365227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,64,0,1,float16,float16,0,0.062074666221936546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,64,128,1,fp8,fp8,0,0.03350399931271871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,64,128,1,float16,float16,0,0.03400533397992452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,64,0,1,float16,fp8,0,0.060085331400235496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,64,0,1,fp8,fp8,0,0.0581226646900177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,64,128,1,fp8,fp8,0,0.03173333406448364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,64,128,1,float16,fp8,0,0.0335359995563825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,64,0,1,float16,float16,0,0.06204266846179962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,64,0,1,float16,fp8,0,0.06232533355553945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,64,128,1,float16,float16,0,0.03365333378314972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,64,0,1,fp8,fp8,0,0.057962665955225624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,64,0,1,float16,float16,0,0.061946665247281395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,64,128,1,float16,fp8,0,0.03401600072781245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,64,128,1,fp8,fp8,0,0.03364266703526179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,64,0,1,float16,fp8,0,0.06043200194835663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,64,0,1,fp8,fp8,0,0.058362667759259544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,64,128,1,float16,float16,0,0.4652906656265259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,64,128,1,float16,fp8,0,0.46538134415944415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,64,0,1,float16,float16,0,0.6839679876963297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,64,128,1,fp8,fp8,0,0.42737066745758057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,64,0,1,float16,fp8,0,0.6860533555348715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,64,0,1,fp8,fp8,0,0.6266826788584391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,64,128,1,float16,float16,0,0.48505600293477374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,64,128,1,float16,fp8,0,0.4860853354136149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,64,128,1,fp8,fp8,0,0.43909867604573566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,64,0,1,float16,float16,0,0.7031573454538981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,64,0,1,float16,fp8,0,0.7035146554311117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,64,128,1,float16,float16,0,0.4958133300145467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,64,0,1,fp8,fp8,0,0.6388746500015259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,64,128,1,float16,fp8,0,0.4958719809850057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,64,0,1,float16,float16,0,0.7140426635742188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,64,128,1,fp8,fp8,0,0.452021320660909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,64,128,1,float16,float16,0,0.27029865980148315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,64,0,1,float16,fp8,0,0.7142826716105143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,64,0,1,fp8,fp8,0,0.6542826493581136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,64,128,1,float16,fp8,0,0.2699573238690694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,64,0,1,float16,float16,0,0.38567999998728436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,64,128,1,fp8,fp8,0,0.2576853235562642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,64,128,1,float16,float16,0,0.2308853268623352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,64,0,1,float16,fp8,0,0.38760534922281903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,64,0,1,fp8,fp8,0,0.3612426519393921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,64,0,1,float16,float16,0,0.3485333522160848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,64,128,1,float16,fp8,0,0.23317867517471313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,64,128,1,fp8,fp8,0,0.21862934033075967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,64,0,1,float16,fp8,0,0.34618135293324787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,64,0,1,fp8,fp8,0,0.3257066607475281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,64,128,1,float16,float16,0,0.23889066775639853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,64,0,1,float16,float16,0,0.3542666832605998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,64,128,1,float16,fp8,0,0.23846399784088135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,64,128,1,fp8,fp8,0,0.2285919984181722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,64,0,1,float16,fp8,0,0.3540106614430745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,64,128,1,float16,float16,0,0.24778133630752563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,64,0,1,fp8,fp8,0,0.33269333839416504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,64,0,1,float16,float16,0,0.36512001355489093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,64,128,1,float16,fp8,0,0.2488373319307963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,64,128,1,fp8,fp8,0,0.23624533414840698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,64,0,1,float16,fp8,0,0.36615999539693195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,64,128,1,float16,float16,0,0.14046933253606161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,64,0,1,fp8,fp8,0,0.3405333360036214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,64,0,1,float16,float16,0,0.2067520022392273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,64,128,1,float16,fp8,0,0.1423360009988149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,64,128,1,fp8,fp8,0,0.13870933651924133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,64,0,1,float16,fp8,0,0.20817599693934122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,64,0,1,fp8,fp8,0,0.1959786613782247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,64,128,1,float16,float16,0,0.11726400256156921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,64,0,1,float16,float16,0,0.1832746664683024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,64,128,1,float16,fp8,0,0.11778666575749715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,64,128,1,fp8,fp8,0,0.11387733618418376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,64,0,1,float16,fp8,0,0.1832266648610433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,64,0,1,fp8,fp8,0,0.17072000106175741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,64,128,1,float16,float16,0,0.12014933427174886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,64,0,1,float16,float16,0,0.18428800503412882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,64,128,1,float16,fp8,0,0.12164800365765889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,64,128,1,fp8,fp8,0,0.11868799726168315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,64,0,1,float16,fp8,0,0.1864373286565145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,64,0,1,fp8,fp8,0,0.17704000075658163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,64,128,1,float16,float16,0,0.1270080010096232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,64,128,1,float16,fp8,0,0.12967466314633688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,64,0,1,float16,float16,0,0.19125332434972128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,64,128,1,fp8,fp8,0,0.12667733430862427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,64,0,1,float16,fp8,0,0.19363733132680258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,64,128,1,float16,float16,0,0.07665066421031952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,64,0,1,fp8,fp8,0,0.1853440006573995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,64,128,1,float16,fp8,0,0.0767626663049062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,64,0,1,float16,float16,0,0.1113813320795695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,64,128,1,fp8,fp8,0,0.07864533364772797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,64,0,1,float16,fp8,0,0.11266666650772095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,64,128,1,float16,float16,0,0.06849599877993266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,64,0,1,fp8,fp8,0,0.11142933368682861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,64,0,1,float16,float16,0,0.10494933525721233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,64,128,1,float16,fp8,0,0.07049599786599477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,64,128,1,fp8,fp8,0,0.06471466521422069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,64,0,1,float16,fp8,0,0.10614933570226033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,64,128,1,float16,float16,0,0.0705866664648056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,64,0,1,fp8,fp8,0,0.09937600294748943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,64,128,1,float16,fp8,0,0.0724426656961441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,64,128,1,fp8,fp8,0,0.06654400130112965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,64,0,1,float16,float16,0,0.10574932893117268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,64,128,1,float16,float16,0,0.07232533395290375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,64,0,1,float16,fp8,0,0.10736533006032307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,64,0,1,fp8,fp8,0,0.1009386678536733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,64,0,1,float16,float16,0,0.10744532942771912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,64,128,1,float16,fp8,0,0.07478933533032735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,64,128,1,fp8,fp8,0,0.07021333277225494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,64,128,1,float16,float16,0,0.04560000201066335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,64,0,1,float16,fp8,0,0.10938133796056111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,64,0,1,fp8,fp8,0,0.10217600067456563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,64,0,1,float16,float16,0,0.0708426684141159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,64,128,1,float16,fp8,0,0.04598399996757507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,64,128,1,fp8,fp8,0,0.04403733213742574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,64,128,1,float16,float16,0,0.042223999897638954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,64,0,1,fp8,fp8,0,0.06803733110427856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,64,0,1,float16,fp8,0,0.07229333122571309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,64,0,1,float16,float16,0,0.06851199766000111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,64,128,1,float16,fp8,0,0.04382933179537455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,64,128,1,fp8,fp8,0,0.041375999649365745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,64,0,1,float16,fp8,0,0.06971199810504913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,64,128,1,float16,float16,0,0.04382933179537455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,64,0,1,fp8,fp8,0,0.06454400221506755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,64,128,1,float16,fp8,0,0.04385066529115041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,64,0,1,float16,float16,0,0.0698880006869634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,64,128,1,fp8,fp8,0,0.0418453315893809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,64,0,1,float16,fp8,0,0.07018666466077168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,64,128,1,float16,float16,0,0.043840001026789345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,64,0,1,fp8,fp8,0,0.06620266536871593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,64,0,1,float16,float16,0,0.07012266914049785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,64,128,1,float16,fp8,0,0.04410133262475332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,64,128,1,fp8,fp8,0,0.04288533329963684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,64,0,1,float16,fp8,0,0.070592001080513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,64,128,1,float16,float16,0,0.031658666829268135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,64,128,1,float16,fp8,0,0.03129599988460541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,64,0,1,fp8,fp8,0,0.06617600222428639
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,64,0,1,float16,float16,0,0.048170665899912514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,64,128,1,fp8,fp8,0,0.03161599983771642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,64,0,1,float16,fp8,0,0.048154667019844055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,64,0,1,fp8,fp8,0,0.046015997727712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,64,128,1,float16,float16,0,0.031311998764673867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,64,0,1,float16,float16,0,0.046015997727712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,64,128,1,fp8,fp8,0,0.029338667790095013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,64,128,1,float16,fp8,0,0.03126399964094162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,64,0,1,float16,fp8,0,0.048021331429481506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,64,0,1,fp8,fp8,0,0.044256001710891724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,64,128,1,float16,float16,0,0.02935466667016347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,64,128,1,float16,fp8,0,0.03169599920511246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,64,0,1,float16,float16,0,0.047695999344189964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,64,128,1,fp8,fp8,0,0.02956799914439519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,64,0,1,float16,fp8,0,0.046949331959088646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,64,128,1,float16,float16,0,0.029493334392706554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,64,0,1,fp8,fp8,0,0.04385066529115041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,64,128,1,float16,fp8,0,0.030154667794704437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,64,0,1,float16,float16,0,0.047269334395726524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,64,128,1,fp8,fp8,0,0.029637334247430164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,64,0,1,float16,fp8,0,0.04761599997679392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,64,0,1,fp8,fp8,0,0.04563199977080027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,64,128,1,float16,float16,0,0.029557332396507263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,64,0,1,float16,float16,0,0.04558399816354116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,64,128,1,float16,fp8,0,0.029178666571776073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,64,128,1,fp8,fp8,0,0.029359998802344005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,64,0,1,float16,fp8,0,0.04577599962552389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,64,0,1,fp8,fp8,0,0.04378133515516917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,64,128,1,float16,float16,0,0.029711998999118805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,64,0,1,float16,float16,0,0.04570133487383524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,64,128,1,float16,fp8,0,0.02945599953333537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,64,128,1,fp8,fp8,0,0.029290666182835896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,64,0,1,float16,fp8,0,0.04568000137805939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,64,0,1,fp8,fp8,0,0.044165333112080894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,64,128,1,float16,float16,0,0.029445332785447437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,64,128,1,float16,fp8,0,0.02959466725587845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,64,0,1,float16,float16,0,0.045647998650868736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,64,128,1,fp8,fp8,0,0.027722666660944622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,64,0,1,float16,fp8,0,0.046154667933781944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,64,0,1,fp8,fp8,0,0.04404800136884054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,64,128,1,float16,float16,0,0.029301332930723827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,64,128,1,float16,fp8,0,0.02940266579389572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,64,0,1,float16,float16,0,0.045797333121299744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,64,128,1,fp8,fp8,0,0.02754133443037669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,64,0,1,float16,fp8,0,0.045647998650868736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,64,0,1,fp8,fp8,0,0.043621331453323364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,64,128,1,float16,float16,0,0.34725332260131836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,64,128,1,float16,fp8,0,0.34601600964864093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,64,0,1,float16,float16,0,0.45630399386088055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,64,128,1,fp8,fp8,0,0.3232053319613139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,64,0,1,float16,fp8,0,0.455946683883667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,64,0,1,fp8,fp8,0,0.4211413462956746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,64,128,1,float16,float16,0,0.36136531829833984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,64,128,1,float16,fp8,0,0.3596106767654419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,64,0,1,float16,float16,0,0.4700373411178589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,64,128,1,fp8,fp8,0,0.3346879879633586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,64,0,1,float16,fp8,0,0.47275201479593915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,64,0,1,fp8,fp8,0,0.4336106777191162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,64,128,1,float16,float16,0,0.3798559904098511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,64,128,1,float16,fp8,0,0.3720746835072835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,64,0,1,float16,float16,0,0.48312000433603924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,64,128,1,fp8,fp8,0,0.3473759889602661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,64,128,1,float16,float16,0,0.2056586742401123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,64,0,1,float16,fp8,0,0.4809439977010091
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,64,0,1,fp8,fp8,0,0.4458293517430623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,64,0,1,float16,float16,0,0.2661813298861186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,64,128,1,float16,fp8,0,0.208021342754364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,64,128,1,fp8,fp8,0,0.1981653372446696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,64,0,1,float16,fp8,0,0.26817067464192706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,64,0,1,fp8,fp8,0,0.2520693341890971
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,64,128,1,float16,float16,0,0.17364267508188883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,64,0,1,float16,float16,0,0.23358933130900064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,64,128,1,fp8,fp8,0,0.16825066010157266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,64,128,1,float16,fp8,0,0.17497066656748453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,64,128,1,float16,float16,0,0.18041600783665976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,64,0,1,fp8,fp8,0,0.22035199403762817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,64,0,1,float16,fp8,0,0.2342133323351542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,64,0,1,float16,float16,0,0.23925334215164185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,64,128,1,float16,fp8,0,0.18106132745742798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,64,128,1,fp8,fp8,0,0.17403199275334677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,64,0,1,fp8,fp8,0,0.22636266549428305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,64,0,1,float16,fp8,0,0.2413333257039388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,64,128,1,float16,float16,0,0.1895786722501119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,64,0,1,float16,float16,0,0.25118400653203327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,64,128,1,float16,fp8,0,0.19156799713770548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,64,128,1,fp8,fp8,0,0.18323200941085815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,64,128,1,float16,float16,0,0.10789866248766582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,64,0,1,float16,fp8,0,0.25150400400161743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,64,0,1,fp8,fp8,0,0.23702933390935263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,64,0,1,float16,float16,0,0.1411946713924408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,64,128,1,float16,fp8,0,0.10910399754842122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,64,128,1,fp8,fp8,0,0.10782399773597717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,64,0,1,float16,fp8,0,0.14457066853841147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,64,0,1,fp8,fp8,0,0.13804800311724344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,64,128,1,float16,float16,0,0.09355200330416362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,64,0,1,float16,float16,0,0.12607466181119284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,64,128,1,float16,fp8,0,0.09507733583450317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,64,128,1,fp8,fp8,0,0.08711999654769897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,64,0,1,float16,fp8,0,0.127920001745224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,64,0,1,fp8,fp8,0,0.11798933148384094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,64,128,1,float16,float16,0,0.09433600306510925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,64,0,1,float16,float16,0,0.12711466352144876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,64,128,1,float16,fp8,0,0.09727999567985535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,64,128,1,fp8,fp8,0,0.09206400314966838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,64,0,1,float16,fp8,0,0.12999467055002847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,64,0,1,fp8,fp8,0,0.12121066451072693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,64,128,1,float16,float16,0,0.09816533327102661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,64,0,1,float16,float16,0,0.13078400492668152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,64,128,1,float16,fp8,0,0.10234133402506511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,64,128,1,fp8,fp8,0,0.09895466764767964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,64,128,1,float16,float16,0,0.06028800209363302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,64,0,1,float16,fp8,0,0.13462400436401367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,64,0,1,fp8,fp8,0,0.1288373370965322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,64,0,1,float16,float16,0,0.08086933195590973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,64,128,1,float16,fp8,0,0.0621919979651769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,64,128,1,fp8,fp8,0,0.0602400004863739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,64,0,1,float16,fp8,0,0.08270933230717976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,64,0,1,fp8,fp8,0,0.07863466441631317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,64,128,1,float16,float16,0,0.05597866574923197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,64,0,1,float16,float16,0,0.07693333427111308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,64,128,1,float16,fp8,0,0.05606933434804281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,64,128,1,fp8,fp8,0,0.05218133330345154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,64,0,1,float16,fp8,0,0.07658133407433827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,64,128,1,float16,float16,0,0.056287998954455055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,64,0,1,fp8,fp8,0,0.07293866574764252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,64,0,1,float16,float16,0,0.07698666552702586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,64,128,1,float16,fp8,0,0.05766933163007101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,64,128,1,fp8,fp8,0,0.0540533314148585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,64,0,1,float16,fp8,0,0.07896533111731212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,64,0,1,fp8,fp8,0,0.07255466779073079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,64,128,1,float16,float16,0,0.056559999783833824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,64,0,1,float16,float16,0,0.07870399951934814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,64,128,1,float16,fp8,0,0.05818133552869161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,64,128,1,fp8,fp8,0,0.05641599992911021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,64,0,1,float16,fp8,0,0.08089066545168559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,64,128,1,float16,float16,0,0.04173333446184794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,64,0,1,fp8,fp8,0,0.07456533114115398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,64,128,1,float16,fp8,0,0.041690667470296226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,64,0,1,float16,float16,0,0.053946668903032936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,64,128,1,fp8,fp8,0,0.03963200002908707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,64,0,1,float16,fp8,0,0.054458667834599815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,64,0,1,fp8,fp8,0,0.050373335679372154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,64,128,1,float16,float16,0,0.0395359992980957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,64,128,1,float16,fp8,0,0.03991466760635376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,64,0,1,float16,float16,0,0.050554667909940086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,64,128,1,fp8,fp8,0,0.03743999948104223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,64,0,1,float16,fp8,0,0.052069331208864846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,64,128,1,float16,float16,0,0.039434666434923805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,64,0,1,fp8,fp8,0,0.04795733094215393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,64,0,1,float16,float16,0,0.051957334081331887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,64,128,1,float16,fp8,0,0.039813332259655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,64,128,1,fp8,fp8,0,0.03762666632731756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,64,0,1,float16,fp8,0,0.052282666166623436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,64,0,1,fp8,fp8,0,0.049866666396458946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,64,128,1,float16,float16,0,0.040378667414188385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,64,0,1,float16,float16,0,0.05175999800364176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,64,128,1,float16,fp8,0,0.03990400085846583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,64,128,1,fp8,fp8,0,0.039503999054431915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,64,128,1,float16,float16,0,0.027402666707833607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,64,0,1,float16,fp8,0,0.05198933184146881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,64,0,1,fp8,fp8,0,0.04994666576385498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,64,128,1,float16,fp8,0,0.02923733244339625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,64,0,1,float16,float16,0,0.03953066716591517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,64,128,1,fp8,fp8,0,0.028058665494124096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,64,0,1,float16,fp8,0,0.040106666584809623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,64,0,1,fp8,fp8,0,0.03925866633653641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,64,128,1,float16,float16,0,0.02775999903678894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,64,0,1,float16,float16,0,0.03818666686614355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,64,128,1,float16,fp8,0,0.02792533238728841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,64,128,1,fp8,fp8,0,0.02754133443037669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,64,0,1,float16,fp8,0,0.03962666789690653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,64,0,1,fp8,fp8,0,0.03748266647259394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,64,128,1,float16,float16,0,0.029306667546431225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,64,128,1,float16,fp8,0,0.029088000456492107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,64,128,1,fp8,fp8,0,0.02712533374627431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,64,0,1,float16,float16,0,0.03948266555865606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,64,0,1,float16,fp8,0,0.03957333415746689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,64,0,1,fp8,fp8,0,0.037471999724706016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,64,128,1,float16,float16,0,0.02937600016593933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,64,128,1,float16,fp8,0,0.02771199991305669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,64,0,1,float16,float16,0,0.03966933240493139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,64,128,1,fp8,fp8,0,0.02738133321205775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,64,0,1,float16,fp8,0,0.03958400090535482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,64,128,1,float16,float16,0,0.027317332724730175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,64,0,1,fp8,fp8,0,0.037615999579429626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,64,0,1,float16,float16,0,0.03836799909671148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,64,128,1,float16,fp8,0,0.027232001225153606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,64,128,1,fp8,fp8,0,0.02515733242034912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,64,0,1,float16,fp8,0,0.03882666677236557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,64,128,1,float16,float16,0,0.025301332275072735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,64,0,1,fp8,fp8,0,0.03576533248027166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,64,0,1,float16,float16,0,0.037674665451049805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,64,128,1,float16,fp8,0,0.025578667720158894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,64,128,1,fp8,fp8,0,0.025445332129796345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,64,0,1,float16,fp8,0,0.03772266705830892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,64,0,1,fp8,fp8,0,0.035631999373435974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,64,128,1,float16,float16,0,0.0252960001428922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,64,0,1,float16,float16,0,0.03753600021203359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,64,128,1,float16,fp8,0,0.027349332968393963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,64,128,1,fp8,fp8,0,0.02565866708755493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,64,0,1,float16,fp8,0,0.03791466603676478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,64,0,1,fp8,fp8,0,0.037605332831541695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,64,128,1,float16,float16,0,0.027280000348885853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,64,0,1,float16,float16,0,0.03772266705830892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,64,128,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,64,128,1,fp8,fp8,0,0.026335999369621277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,64,0,1,float16,fp8,0,0.03808533400297165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,64,0,1,fp8,fp8,0,0.03710933278004328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,64,128,1,float16,float16,0,0.3986026843388875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,64,128,1,fp8,fp8,0,0.36750932534535724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,64,128,1,float16,fp8,0,0.3935679992039998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,64,0,1,float16,float16,0,0.47043200333913165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,64,0,1,float16,fp8,0,0.4673546552658081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,64,0,1,fp8,fp8,0,0.43166399002075195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,64,128,1,float16,float16,0,0.3999679883321126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,64,0,1,float16,float16,0,0.4720960060755412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,64,128,1,float16,fp8,0,0.4023360013961792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,64,128,1,fp8,fp8,0,0.37590932846069336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,64,0,1,float16,fp8,0,0.4697386821111043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,64,0,1,fp8,fp8,0,0.43938132127126056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,64,128,1,float16,float16,0,0.4127360184987386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,64,0,1,float16,float16,0,0.48376532395680744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,64,128,1,float16,fp8,0,0.40960534413655597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,64,128,1,fp8,fp8,0,0.3935626745223999
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,64,128,1,float16,float16,0,0.2153759996096293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,64,0,1,float16,fp8,0,0.4838080008824666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,64,0,1,fp8,fp8,0,0.4570080041885376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,64,0,1,float16,float16,0,0.254202663898468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,64,128,1,float16,fp8,0,0.21424533923467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,64,128,1,fp8,fp8,0,0.21683200200398764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,64,0,1,float16,fp8,0,0.25191466013590497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,64,0,1,fp8,fp8,0,0.25007466475168866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,64,128,1,float16,float16,0,0.2087306578954061
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,64,128,1,float16,fp8,0,0.20678400993347168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,64,0,1,float16,float16,0,0.24604799350102743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,64,128,1,fp8,fp8,0,0.19377599159876505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,64,0,1,fp8,fp8,0,0.22790932655334473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,64,0,1,float16,fp8,0,0.24554133415222168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,64,128,1,float16,float16,0,0.210314671198527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,64,128,1,float16,fp8,0,0.20973867177963257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,64,0,1,float16,float16,0,0.2480319937070211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,64,128,1,fp8,fp8,0,0.19755200544993082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,64,0,1,float16,fp8,0,0.2489759922027588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,64,0,1,fp8,fp8,0,0.2303253412246704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,64,128,1,float16,float16,0,0.21494932969411215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,64,128,1,float16,fp8,0,0.21572266022364298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,64,0,1,float16,float16,0,0.2547253370285034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,64,128,1,fp8,fp8,0,0.2047626574834188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,64,128,1,float16,float16,0,0.11979200442632039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,64,0,1,float16,fp8,0,0.2526026765505473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,64,0,1,fp8,fp8,0,0.23976532618204752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,64,0,1,float16,float16,0,0.1434879998366038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,64,128,1,float16,fp8,0,0.11641599734624226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,64,128,1,fp8,fp8,0,0.11986666917800903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,64,0,1,float16,fp8,0,0.14014400045077005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,64,0,1,fp8,fp8,0,0.13885333140691122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,64,128,1,float16,float16,0,0.11136000355084737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,64,0,1,float16,float16,0,0.1339413324991862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,64,128,1,float16,fp8,0,0.1116480032602946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,64,128,1,fp8,fp8,0,0.10482666889826457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,64,0,1,float16,fp8,0,0.1341600020726522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,64,0,1,fp8,fp8,0,0.12372266252835591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,64,128,1,float16,float16,0,0.11337600151697795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,64,128,1,float16,fp8,0,0.1120693286259969
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,64,0,1,float16,float16,0,0.1365013321240743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,64,128,1,fp8,fp8,0,0.10916800300280254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,64,0,1,float16,fp8,0,0.1338879962762197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,64,0,1,fp8,fp8,0,0.12890133261680603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,64,128,1,float16,float16,0,0.11771200100580852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,64,128,1,float16,fp8,0,0.11754133303960164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,64,0,1,float16,float16,0,0.13948800166447958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,64,128,1,fp8,fp8,0,0.114138662815094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,64,128,1,float16,float16,0,0.06422399977842967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,64,0,1,float16,fp8,0,0.13987732927004495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,64,0,1,fp8,fp8,0,0.1316266655921936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,64,128,1,float16,fp8,0,0.06449066599210103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,64,0,1,float16,float16,0,0.0769706666469574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,64,128,1,fp8,fp8,0,0.06820799907048543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,64,0,1,float16,fp8,0,0.07839466631412506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,64,0,1,fp8,fp8,0,0.07923733194669087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,64,128,1,float16,float16,0,0.06381866832574208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,64,0,1,float16,float16,0,0.07679466903209686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,64,128,1,float16,fp8,0,0.06467733283837636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,64,128,1,fp8,fp8,0,0.06067200005054474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,64,0,1,float16,fp8,0,0.07604266703128815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,64,128,1,float16,float16,0,0.064410666624705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,64,0,1,fp8,fp8,0,0.07247466842333476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,64,0,1,float16,float16,0,0.07763200004895528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,64,128,1,float16,fp8,0,0.06427733103434245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,64,128,1,fp8,fp8,0,0.06065066655476888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,64,0,1,float16,fp8,0,0.0773119976123174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,64,0,1,fp8,fp8,0,0.07281599938869476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,64,128,1,float16,float16,0,0.06644266843795776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,64,0,1,float16,float16,0,0.07750933369000752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,64,128,1,float16,fp8,0,0.06637866795063019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,64,128,1,fp8,fp8,0,0.064560001095136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,64,0,1,float16,fp8,0,0.07867733140786488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,64,128,1,float16,float16,0,0.04121066629886627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,64,0,1,fp8,fp8,0,0.07446933289368947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,64,0,1,float16,float16,0,0.050928001602490745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,64,128,1,float16,fp8,0,0.04170133173465729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,64,128,1,fp8,fp8,0,0.040864000717798867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,64,0,1,float16,fp8,0,0.04990933338801066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,64,0,1,fp8,fp8,0,0.049786667029062905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,64,128,1,float16,float16,0,0.041663999358812966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,64,0,1,float16,float16,0,0.049882665276527405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,64,128,1,float16,fp8,0,0.0415786678592364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,64,128,1,fp8,fp8,0,0.03938666731119156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,64,0,1,float16,fp8,0,0.050053333242734276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,64,128,1,float16,float16,0,0.0414986660083135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,64,0,1,fp8,fp8,0,0.04791999856630961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,64,0,1,float16,float16,0,0.05013866722583771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,64,128,1,float16,fp8,0,0.04001600046952566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,64,128,1,fp8,fp8,0,0.040063999593257904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,64,0,1,float16,fp8,0,0.04985066751639048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,64,0,1,fp8,fp8,0,0.048112000028292336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,64,128,1,float16,float16,0,0.041722665230433144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,64,128,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,64,0,1,float16,float16,0,0.05186133086681366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,64,128,1,fp8,fp8,0,0.03946666667858759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,64,0,1,float16,fp8,0,0.050016000866889954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,64,128,1,float16,float16,0,0.027482666075229645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,64,0,1,fp8,fp8,0,0.04986133178075155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,64,0,1,float16,float16,0,0.033370666205883026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,64,128,1,float16,fp8,0,0.027509334186712902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,64,128,1,fp8,fp8,0,0.027210667729377747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,64,0,1,float16,fp8,0,0.033626665671666466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,64,0,1,fp8,fp8,0,0.033439998825391136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,64,128,1,float16,float16,0,0.027232001225153606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,64,0,1,float16,float16,0,0.03158933420976003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,64,128,1,float16,fp8,0,0.026789332429567974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,64,128,1,fp8,fp8,0,0.027189334233601887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,64,0,1,float16,fp8,0,0.03331733246644338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,64,0,1,fp8,fp8,0,0.03141333411137263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,64,128,1,float16,float16,0,0.025786665578683216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,64,0,1,float16,float16,0,0.03345600018898646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,64,128,1,float16,fp8,0,0.02740799884001414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,64,128,1,fp8,fp8,0,0.025536000728607178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,64,0,1,float16,fp8,0,0.03154666721820831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,64,0,1,fp8,fp8,0,0.032074667513370514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,64,128,1,float16,float16,0,0.02719466636578242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,64,0,1,float16,float16,0,0.03372266640265783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,64,128,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,64,128,1,fp8,fp8,0,0.027210667729377747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,64,0,1,float16,fp8,0,0.03378133227427801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,64,128,1,float16,float16,0,0.02312533309062322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,64,0,1,fp8,fp8,0,0.03335466732581457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,64,0,1,float16,float16,0,0.02959466725587845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,64,128,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,64,128,1,fp8,fp8,0,0.023237332701683044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,64,0,1,float16,fp8,0,0.029674666623274486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,64,0,1,fp8,fp8,0,0.029285334050655365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,64,128,1,float16,float16,0,0.023157333334287006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,64,0,1,float16,float16,0,0.02940266579389572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,64,128,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,64,128,1,fp8,fp8,0,0.02327999969323476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,64,0,1,float16,fp8,0,0.029535998900731403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,64,0,1,fp8,fp8,0,0.027456000447273254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,64,128,1,float16,float16,0,0.02349333216746648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,64,0,1,float16,float16,0,0.02922666569550832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,64,128,1,float16,fp8,0,0.023178666830062866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,64,128,1,fp8,fp8,0,0.021183999876181286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,64,0,1,float16,fp8,0,0.029546665648619335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,64,0,1,fp8,fp8,0,0.02757866680622101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,64,128,1,float16,float16,0,0.0235359991590182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,64,0,1,float16,float16,0,0.029626667499542236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,64,128,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,64,128,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,64,0,1,float16,fp8,0,0.029274667302767437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,64,0,1,fp8,fp8,0,0.027248000105222065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,64,128,1,float16,float16,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,64,0,1,float16,float16,0,0.027610667049884796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,64,128,1,float16,fp8,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,64,128,1,fp8,fp8,0,0.02146133283774058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,64,0,1,float16,fp8,0,0.02754666656255722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,64,0,1,fp8,fp8,0,0.026586666703224182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,64,128,1,float16,float16,0,0.02149333308140437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,64,0,1,float16,float16,0,0.027189334233601887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,64,128,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,64,128,1,fp8,fp8,0,0.020997333029905956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,64,0,1,float16,fp8,0,0.027621333797772724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,64,0,1,fp8,fp8,0,0.027600000301996868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,64,128,1,float16,float16,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,64,0,1,float16,float16,0,0.027530667682488758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,64,128,1,float16,fp8,0,0.02178666740655899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,64,128,1,fp8,fp8,0,0.021407999098300934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,64,0,1,float16,fp8,0,0.02790933350721995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,64,0,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,64,128,1,float16,float16,0,0.021829334398110706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,64,0,1,float16,float16,0,0.0273333340883255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,64,128,1,float16,fp8,0,0.023290666441122692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,64,128,1,fp8,fp8,0,0.021104000508785248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,64,0,1,float16,fp8,0,0.027471999327341717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,64,0,1,fp8,fp8,0,0.02537599951028824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,64,128,1,float16,float16,0,0.3897013266881307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,64,0,1,float16,float16,0,0.39363733927408856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,64,128,1,float16,fp8,0,0.3842720190684001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,64,128,1,fp8,fp8,0,0.35949865976969403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,64,0,1,float16,fp8,0,0.39109333356221515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,64,0,1,fp8,fp8,0,0.363813320795695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,64,128,1,float16,float16,0,0.3933653434117635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,64,0,1,float16,float16,0,0.39829333623250324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,64,128,1,float16,fp8,0,0.3909173409144084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,64,128,1,fp8,fp8,0,0.36607468128204346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,64,0,1,float16,fp8,0,0.39804800351460773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,64,0,1,fp8,fp8,0,0.36979734897613525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,64,128,1,float16,float16,0,0.40276801586151123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,64,0,1,float16,float16,0,0.4118293523788452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,64,128,1,float16,fp8,0,0.4052160183588664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,64,128,1,fp8,fp8,0,0.38229866822560626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,64,0,1,float16,fp8,0,0.4092426697413127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,64,128,1,float16,float16,0,0.21215466658274332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,64,0,1,float16,float16,0,0.21387734015782675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,64,128,1,float16,fp8,0,0.20795732736587524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,64,0,1,fp8,fp8,0,0.3842879931131999
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,64,0,1,float16,fp8,0,0.21176000436147055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,64,128,1,fp8,fp8,0,0.21134400367736816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,64,0,1,fp8,fp8,0,0.21374932924906412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,64,128,1,float16,float16,0,0.2037013371785482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,64,0,1,float16,float16,0,0.20566399892171225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,64,128,1,float16,fp8,0,0.20204800367355347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,64,128,1,fp8,fp8,0,0.1888479987780253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,64,0,1,float16,fp8,0,0.2054133415222168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,64,0,1,fp8,fp8,0,0.19139200448989868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,64,128,1,float16,float16,0,0.20641599098841348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,64,0,1,float16,float16,0,0.20870399475097656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,64,128,1,float16,fp8,0,0.20495466391245523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,64,128,1,fp8,fp8,0,0.19150932629903158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,64,0,1,float16,fp8,0,0.20786132415135702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,64,0,1,fp8,fp8,0,0.19435199101765951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,64,128,1,float16,float16,0,0.21040000518163046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,64,0,1,float16,float16,0,0.21459199984868368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,64,128,1,float16,fp8,0,0.2097919980684916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,64,128,1,fp8,fp8,0,0.19982399543126425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,64,0,1,float16,fp8,0,0.21388800938924155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,64,128,1,float16,float16,0,0.11774399876594543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,64,0,1,fp8,fp8,0,0.20137600104014078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,64,0,1,float16,float16,0,0.1200213332970937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,64,128,1,float16,fp8,0,0.11404800415039062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,64,128,1,fp8,fp8,0,0.11808533469835918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,64,0,1,float16,fp8,0,0.11761066317558289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,64,0,1,fp8,fp8,0,0.11913599570592244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,64,128,1,float16,float16,0,0.1102186640103658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,64,0,1,float16,float16,0,0.11201600233713786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,64,128,1,float16,fp8,0,0.10954667131106059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,64,128,1,fp8,fp8,0,0.1034879982471466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,64,0,1,float16,fp8,0,0.11192533373832703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,64,0,1,fp8,fp8,0,0.10479999581972758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,64,128,1,float16,float16,0,0.11194133758544922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,64,0,1,float16,float16,0,0.11332266529401143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,64,128,1,float16,fp8,0,0.11217600107192993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,64,128,1,fp8,fp8,0,0.107205331325531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,64,0,1,float16,fp8,0,0.11339199542999268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,64,128,1,float16,float16,0,0.11525866389274597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,64,0,1,fp8,fp8,0,0.10857066512107849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,64,0,1,float16,float16,0,0.11824533343315125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,64,128,1,float16,fp8,0,0.11556800206502278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,64,128,1,fp8,fp8,0,0.11174933115641277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,64,0,1,float16,fp8,0,0.11729600032170613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,64,128,1,float16,float16,0,0.06417066852251689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,64,0,1,fp8,fp8,0,0.1128480037053426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,64,0,1,float16,float16,0,0.06447466711203258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,64,128,1,fp8,fp8,0,0.0665226678053538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,64,128,1,float16,fp8,0,0.06326400240262349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,64,0,1,float16,fp8,0,0.0644160012404124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,64,0,1,fp8,fp8,0,0.06657599906126659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,64,128,1,float16,float16,0,0.0629066675901413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,64,0,1,float16,float16,0,0.06229333579540253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,64,128,1,float16,fp8,0,0.06267199913660686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,64,128,1,fp8,fp8,0,0.058543999989827476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,64,0,1,float16,fp8,0,0.0624533345301946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,64,0,1,fp8,fp8,0,0.06006933252016703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,64,128,1,float16,float16,0,0.06282133360703786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,64,0,1,float16,float16,0,0.06408533453941345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,64,128,1,float16,fp8,0,0.06376533210277557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,64,128,1,fp8,fp8,0,0.060453335444132485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,64,0,1,float16,fp8,0,0.06433066725730896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,64,0,1,fp8,fp8,0,0.060362666845321655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,64,0,1,float16,float16,0,0.06459733347098033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,64,128,1,float16,float16,0,0.06465066472689311
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,64,128,1,float16,fp8,0,0.0643093337615331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,64,128,1,fp8,fp8,0,0.06211733321348826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,64,0,1,float16,fp8,0,0.06637333333492279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,64,0,1,fp8,fp8,0,0.06225599845250448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,64,128,1,float16,float16,0,0.04163199911514918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,64,128,1,float16,fp8,0,0.039818666875362396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,64,0,1,float16,float16,0,0.04345066845417023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,64,128,1,fp8,fp8,0,0.042549331982930504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,64,0,1,float16,fp8,0,0.042021334171295166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,64,0,1,fp8,fp8,0,0.0414986660083135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,64,128,1,float16,float16,0,0.04001600046952566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,64,128,1,float16,fp8,0,0.04161600023508072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,64,128,1,fp8,fp8,0,0.03977599988381068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,64,0,1,float16,float16,0,0.042538667718569435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,64,0,1,float16,fp8,0,0.04188266893227895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,64,0,1,fp8,fp8,0,0.04035199930270513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,64,128,1,float16,float16,0,0.04159999887148539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,64,128,1,float16,fp8,0,0.040378667414188385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,64,0,1,float16,float16,0,0.0422986646493276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,64,128,1,fp8,fp8,0,0.03965333352486292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,64,0,1,float16,fp8,0,0.04204266766707102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,64,0,1,fp8,fp8,0,0.040021332601706185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,64,128,1,float16,float16,0,0.04180799921353658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,64,0,1,float16,float16,0,0.04154666761557261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,64,128,1,float16,fp8,0,0.041562666495641075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,64,128,1,fp8,fp8,0,0.04201066493988037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,64,0,1,float16,fp8,0,0.04460800190766653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,64,0,1,fp8,fp8,0,0.04162133236726125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,64,128,1,float16,float16,0,0.027232001225153606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,64,0,1,float16,float16,0,0.029018667836983997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,64,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,64,128,1,fp8,fp8,0,0.02718399961789449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,64,128,1,float16,fp8,0,0.02749866743882497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,64,0,1,fp8,fp8,0,0.02755733331044515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,64,128,1,float16,float16,0,0.027162666122118633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,64,0,1,float16,float16,0,0.027445333699385326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,64,128,1,float16,fp8,0,0.026234666506449383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,64,128,1,fp8,fp8,0,0.025519999365011852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,64,0,1,float16,fp8,0,0.02734400083621343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,64,0,1,fp8,fp8,0,0.027424000203609467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,64,128,1,float16,float16,0,0.027482666075229645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,64,0,1,float16,float16,0,0.027888000011444092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,64,128,1,float16,fp8,0,0.026543999711672466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,64,128,1,fp8,fp8,0,0.027456000447273254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,64,0,1,float16,fp8,0,0.027514666318893433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,64,0,1,fp8,fp8,0,0.026170666019121807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,64,128,1,float16,float16,0,0.027647999425729115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,64,0,1,float16,float16,0,0.027813332776228588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,64,128,1,float16,fp8,0,0.0272533322374026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,64,128,1,fp8,fp8,0,0.027488000690937042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,64,0,1,float16,fp8,0,0.02926933268706004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,64,0,1,fp8,fp8,0,0.027232001225153606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,64,128,1,float16,float16,0,0.023152001202106476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,64,0,1,float16,float16,0,0.023402666052182514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,64,128,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,64,128,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,64,0,1,float16,fp8,0,0.023647998770078022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,64,0,1,fp8,fp8,0,0.023525332411130268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,64,128,1,float16,float16,0,0.023050665855407715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,64,0,1,float16,float16,0,0.023775999744733173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,64,128,1,float16,fp8,0,0.023077333966890972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,64,128,1,fp8,fp8,0,0.021370666722456615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,64,0,1,float16,fp8,0,0.02333866556485494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,64,0,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,64,128,1,float16,float16,0,0.023152001202106476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,64,0,1,float16,float16,0,0.02348266790310542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,64,128,1,float16,fp8,0,0.02309333284695943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,64,128,1,fp8,fp8,0,0.021290667355060577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,64,0,1,float16,fp8,0,0.023445333043734234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,64,0,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,64,128,1,float16,float16,0,0.023333333432674408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,64,0,1,float16,float16,0,0.023455999791622162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,64,128,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,64,128,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,64,0,1,float16,fp8,0,0.023669332265853882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,64,0,1,fp8,fp8,0,0.023370665808518726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,64,128,1,float16,float16,0,0.02160533269246419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,64,0,1,float16,float16,0,0.02348266790310542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,64,128,1,float16,fp8,0,0.022800001005331676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,64,128,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,64,0,1,float16,fp8,0,0.02349333216746648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,64,0,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,64,128,1,float16,float16,0,0.021456000705560047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,64,0,1,float16,float16,0,0.02330133318901062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,64,128,1,float16,fp8,0,0.021104000508785248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,64,128,1,fp8,fp8,0,0.021407999098300934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,64,0,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,64,0,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,64,128,1,float16,float16,0,0.02120000123977661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,64,0,1,float16,float16,0,0.021642667551835377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,64,128,1,float16,fp8,0,0.021701333423455555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,64,128,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,64,0,1,float16,fp8,0,0.021530665457248688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,64,0,1,fp8,fp8,0,0.021386665602525074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,64,128,1,float16,float16,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,64,0,1,float16,float16,0,0.021365332106749218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,64,128,1,float16,fp8,0,0.022511998812357586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,64,128,1,fp8,fp8,0,0.02144533395767212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,64,0,1,float16,fp8,0,0.022218666970729828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,64,0,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,64,128,1,float16,float16,0,0.021370666722456615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,64,0,1,float16,float16,0,0.023056000471115112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,64,128,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,64,128,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,64,0,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,64,0,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,64,128,1,float16,float16,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,64,0,1,float16,float16,0,0.02090666691462199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,64,128,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,64,128,1,fp8,fp8,0,0.02120000123977661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,64,0,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,64,0,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,64,128,1,float16,float16,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,64,0,1,float16,float16,0,0.02142400046189626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,64,128,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,64,128,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,64,0,1,float16,fp8,0,0.022890667120615642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,64,0,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,64,128,1,float16,float16,0,0.021349333226680756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,64,0,1,float16,float16,0,0.022181332111358643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,64,128,1,float16,fp8,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,64,128,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,64,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,64,0,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,64,128,1,float16,float16,0,0.18921067317326865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,64,128,1,float16,fp8,0,0.18726933002471924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,64,0,1,float16,float16,0,0.18508267402648926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,64,128,1,fp8,fp8,0,0.17733333508173624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,64,0,1,float16,fp8,0,0.18321067094802856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,64,0,1,fp8,fp8,0,0.17137600978215536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,64,128,1,float16,float16,0,0.19075733423233032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,64,0,1,float16,float16,0,0.18756800889968872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,64,128,1,float16,fp8,0,0.18920532862345377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,64,128,1,fp8,fp8,0,0.17670400937398276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,64,0,1,float16,fp8,0,0.1866719921429952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,64,0,1,fp8,fp8,0,0.17880533138910928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,64,128,1,float16,float16,0,0.19856532414754233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,64,0,1,float16,float16,0,0.1946666638056437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,64,128,1,float16,fp8,0,0.19520533084869385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,64,128,1,fp8,fp8,0,0.1874826749165853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,64,128,1,float16,float16,0,0.10663466652234395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,64,0,1,float16,float16,0,0.10582933823267619
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,64,0,1,float16,fp8,0,0.19311465819676718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,64,0,1,fp8,fp8,0,0.18305599689483643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,64,128,1,float16,fp8,0,0.10550933082898457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,64,128,1,fp8,fp8,0,0.1079200009504954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,64,0,1,float16,fp8,0,0.10462400317192078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,64,0,1,fp8,fp8,0,0.1042133371035258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,64,0,1,float16,float16,0,0.0993226667245229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,64,128,1,float16,float16,0,0.1016319990158081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,64,128,1,float16,fp8,0,0.09992000460624695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,64,128,1,fp8,fp8,0,0.09564800063769023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,64,0,1,float16,fp8,0,0.09842666983604431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,64,0,1,fp8,fp8,0,0.09314666191736858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,64,128,1,float16,float16,0,0.1033066709836324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,64,0,1,float16,float16,0,0.09943999846776326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,64,128,1,float16,fp8,0,0.1011786659558614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,64,128,1,fp8,fp8,0,0.09921600421269734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,64,0,1,float16,fp8,0,0.09930133819580078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,64,0,1,fp8,fp8,0,0.09552533427874248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,64,128,1,float16,float16,0,0.10609066486358643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,64,0,1,float16,float16,0,0.10533866286277771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,64,128,1,float16,fp8,0,0.10538132985432942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,64,128,1,fp8,fp8,0,0.10315199693044026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,64,0,1,float16,fp8,0,0.10412266850471497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,64,0,1,fp8,fp8,0,0.10056533416112264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,64,128,1,float16,float16,0,0.06015466650327047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,64,0,1,float16,float16,0,0.06027733286221822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,64,128,1,float16,fp8,0,0.06006933252016703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,64,128,1,fp8,fp8,0,0.062447999914487205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,64,0,1,float16,fp8,0,0.05842666824658712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,64,0,1,fp8,fp8,0,0.06214400132497152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,64,128,1,float16,float16,0,0.060047999024391174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,64,0,1,float16,float16,0,0.05823466678460439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,64,128,1,float16,fp8,0,0.05798399945100149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,64,128,1,fp8,fp8,0,0.05620799958705902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,64,0,1,float16,fp8,0,0.056602666775385536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,64,0,1,fp8,fp8,0,0.05436266462008158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,64,128,1,float16,float16,0,0.060421332716941833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,64,128,1,float16,fp8,0,0.058245331048965454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,64,0,1,float16,float16,0,0.05845866600672404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,64,128,1,fp8,fp8,0,0.05593066910902659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,64,0,1,float16,fp8,0,0.058058664202690125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,64,0,1,fp8,fp8,0,0.054671997825304665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,64,128,1,float16,float16,0,0.05993066728115082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,64,0,1,float16,float16,0,0.05789333085219065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,64,128,1,float16,fp8,0,0.060266668597857155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,64,128,1,fp8,fp8,0,0.05826666454474131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,64,0,1,float16,fp8,0,0.058245331048965454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,64,0,1,fp8,fp8,0,0.05648533503214518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,64,128,1,float16,float16,0,0.03754133234421412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,64,0,1,float16,float16,0,0.03596800069014231
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,64,128,1,float16,fp8,0,0.03765333443880081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,64,128,1,fp8,fp8,0,0.03757333258787791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,64,0,1,float16,fp8,0,0.03612799942493439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,64,0,1,fp8,fp8,0,0.037690666814645134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,64,128,1,float16,float16,0,0.03787733366092046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,64,0,1,float16,float16,0,0.037477334340413414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,64,128,1,float16,fp8,0,0.0378560001651446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,64,128,1,fp8,fp8,0,0.03770133356253306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,64,0,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,64,0,1,fp8,fp8,0,0.03566399961709976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,64,128,1,float16,float16,0,0.037946666280428566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,64,0,1,float16,float16,0,0.037621334195137024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,64,128,1,float16,fp8,0,0.038021333515644073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,64,128,1,fp8,fp8,0,0.035829332967599235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,64,0,1,float16,fp8,0,0.03755199909210205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,64,0,1,fp8,fp8,0,0.03586133321126302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,64,128,1,float16,float16,0,0.03989866624275843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,64,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,64,128,1,float16,fp8,0,0.03771200031042099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,64,128,1,fp8,fp8,0,0.038005332152048744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,64,0,1,float16,fp8,0,0.037445334096749626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,64,0,1,fp8,fp8,0,0.03631466627120972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,64,128,1,float16,float16,0,0.025349333882331848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,64,0,1,float16,float16,0,0.025216000775496166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,64,128,1,fp8,fp8,0,0.025349333882331848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,64,0,1,float16,fp8,0,0.025519999365011852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,64,128,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,64,0,1,fp8,fp8,0,0.025216000775496166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,64,128,1,float16,float16,0,0.025050667424996693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,64,0,1,float16,float16,0,0.025248001019159954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,64,128,1,fp8,fp8,0,0.025093334416548412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,64,128,1,float16,fp8,0,0.026399999856948853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,64,0,1,float16,fp8,0,0.025455998877684276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,64,0,1,fp8,fp8,0,0.02350933353106181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,64,128,1,float16,float16,0,0.025413334369659424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,64,0,1,float16,float16,0,0.025466665625572205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,64,128,1,float16,fp8,0,0.025413334369659424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,64,128,1,fp8,fp8,0,0.025349333882331848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,64,0,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,64,128,1,float16,float16,0,0.025263999899228413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,64,0,1,float16,float16,0,0.025253333151340485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,64,0,1,fp8,fp8,0,0.024133334557215374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,64,128,1,float16,fp8,0,0.025216000775496166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,64,128,1,fp8,fp8,0,0.025424001117547352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,64,0,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,64,128,1,float16,float16,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,64,0,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,64,0,1,float16,float16,0,0.021530665457248688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,64,128,1,float16,fp8,0,0.02181866765022278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,64,128,1,fp8,fp8,0,0.021290667355060577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,64,0,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,64,0,1,fp8,fp8,0,0.021402666966120403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,64,128,1,float16,float16,0,0.02102400114138921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,64,0,1,float16,float16,0,0.02126399924357732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,64,128,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,64,128,1,fp8,fp8,0,0.021562665700912476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,64,0,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,64,0,1,float16,fp8,0,0.02149333308140437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,64,128,1,float16,float16,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,64,0,1,float16,float16,0,0.021125334004561108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,64,128,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,64,128,1,float16,fp8,0,0.02147199958562851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,64,0,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,64,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,64,128,1,float16,float16,0,0.021381333470344543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,64,0,1,float16,float16,0,0.021013334393501282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,64,128,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,64,128,1,fp8,fp8,0,0.022154666483402252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,64,0,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,64,0,1,fp8,fp8,0,0.021183999876181286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,64,0,1,float16,float16,0,0.02053333322207133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,64,128,1,float16,float16,0,0.02162666618824005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,64,128,1,float16,fp8,0,0.01940800001223882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,64,128,1,fp8,fp8,0,0.020938667158285778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,64,0,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,64,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,64,128,1,float16,float16,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,64,0,1,float16,float16,0,0.019941333681344986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,64,128,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,64,128,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,64,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,64,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,64,128,1,float16,float16,0,0.01923199991385142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,64,0,1,float16,float16,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,64,128,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,64,128,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,64,0,1,float16,fp8,0,0.01903466631968816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,64,0,1,fp8,fp8,0,0.019567999988794327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,64,128,1,float16,float16,0,0.020901332298914593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,64,0,1,float16,float16,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,64,128,1,float16,fp8,0,0.01937599976857503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,64,128,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,64,0,1,float16,fp8,0,0.020746666938066483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,64,0,1,fp8,fp8,0,0.019823999454577763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,64,128,1,float16,float16,0,0.01926933353145917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,64,0,1,float16,float16,0,0.019152000546455383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,64,128,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,64,128,1,fp8,fp8,0,0.018800000349680584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,64,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,64,0,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,64,128,1,float16,float16,0,0.019296000401178997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,64,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,64,128,1,float16,fp8,0,0.019199999670187633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,64,128,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,64,0,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,64,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,64,128,1,float16,float16,0,0.01934933289885521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,64,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,64,128,1,float16,fp8,0,0.019685332973798115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,64,128,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,64,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,64,0,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,64,128,1,float16,float16,0,0.019146667172511418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,64,0,1,float16,float16,0,0.01930133377512296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,64,128,1,float16,fp8,0,0.019952000429232914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,64,128,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,64,0,1,float16,fp8,0,0.01903466631968816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,64,0,1,fp8,fp8,0,0.018245333184798557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,64,128,1,float16,float16,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,64,0,1,float16,float16,0,0.019023999571800232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,64,128,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,64,128,1,fp8,fp8,0,0.019466667125622433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,64,0,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,64,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,64,128,1,float16,float16,0,0.019167999426523846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,64,0,1,float16,float16,0,0.019226666539907455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,64,128,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,64,128,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,64,0,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,64,0,1,fp8,fp8,0,0.01882133384545644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,64,128,1,float16,float16,0,0.01889066646496455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,64,0,1,float16,float16,0,0.019013332823912304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,64,128,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,64,128,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,64,0,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,64,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,64,128,1,float16,float16,0,0.019493332753578823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,64,0,1,float16,float16,0,0.01923199991385142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,64,128,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,64,128,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,64,0,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,64,0,1,fp8,fp8,0,0.017786666750907898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,64,128,1,float16,float16,0,0.1067626674969991
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,64,0,1,float16,float16,0,0.10544000069300334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,64,128,1,float16,fp8,0,0.10552000006039937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,64,128,1,fp8,fp8,0,0.10087466239929199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,64,0,1,float16,fp8,0,0.10549333691596985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,64,0,1,fp8,fp8,0,0.10155733426411946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,64,128,1,float16,float16,0,0.10714667042096455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,64,0,1,float16,float16,0,0.10738666852315266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,64,128,1,float16,fp8,0,0.10733866691589355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,64,128,1,fp8,fp8,0,0.10014399886131287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,64,0,1,float16,fp8,0,0.10777067144711812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,64,0,1,fp8,fp8,0,0.10335466265678406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,64,128,1,float16,float16,0,0.111653337876002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,64,0,1,float16,float16,0,0.11147200067838033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,64,128,1,float16,fp8,0,0.10987200339635213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,64,128,1,fp8,fp8,0,0.10686933000882466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,64,0,1,fp8,fp8,0,0.10564800103505452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,64,0,1,float16,fp8,0,0.10844266414642334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,64,128,1,float16,float16,0,0.061573331554730736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,64,0,1,float16,float16,0,0.060047999024391174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,64,128,1,float16,fp8,0,0.060533334811528526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,64,128,1,fp8,fp8,0,0.06248533229033152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,64,0,1,float16,fp8,0,0.05952000121275584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,64,0,1,fp8,fp8,0,0.0644160012404124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,64,128,1,float16,float16,0,0.05815466741720835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,64,0,1,float16,float16,0,0.05789333085219065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,64,128,1,float16,fp8,0,0.05866133173306783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,64,128,1,fp8,fp8,0,0.05473066866397858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,64,0,1,float16,fp8,0,0.058149332801500954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,64,0,1,fp8,fp8,0,0.056015998125076294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,64,128,1,float16,float16,0,0.058431997895240784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,64,0,1,float16,float16,0,0.05997333427270254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,64,128,1,float16,fp8,0,0.059792002042134605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,64,128,1,fp8,fp8,0,0.055770665407180786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,64,0,1,float16,fp8,0,0.05821333328882853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,64,128,1,float16,float16,0,0.06052266558011373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,64,0,1,fp8,fp8,0,0.055162668228149414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,64,0,1,float16,float16,0,0.06044800082842509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,64,128,1,float16,fp8,0,0.06044266621271769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,64,128,1,fp8,fp8,0,0.05923733115196228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,64,0,1,float16,fp8,0,0.05827199916044871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,64,128,1,float16,float16,0,0.03932799895604452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,64,0,1,fp8,fp8,0,0.05823466678460439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,64,0,1,float16,float16,0,0.03762666632731756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,64,128,1,float16,fp8,0,0.03921066721280416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,64,128,1,fp8,fp8,0,0.039690665900707245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,64,0,1,float16,fp8,0,0.03946666667858759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,64,0,1,fp8,fp8,0,0.039781334499518074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,64,128,1,float16,float16,0,0.03974399964014689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,64,0,1,float16,float16,0,0.03957866628964742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,64,128,1,float16,fp8,0,0.039408000806967415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,64,128,1,fp8,fp8,0,0.037632000943024956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,64,0,1,float16,fp8,0,0.03745066622893015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,64,0,1,fp8,fp8,0,0.03603200117746989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,64,128,1,float16,float16,0,0.0397173340121905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,64,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,64,128,1,float16,fp8,0,0.03824000060558319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,64,128,1,fp8,fp8,0,0.037647999823093414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,64,0,1,float16,fp8,0,0.03794133414824804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,64,0,1,fp8,fp8,0,0.03758399933576584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,64,128,1,float16,float16,0,0.03982933362325033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,64,0,1,float16,float16,0,0.03953066716591517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,64,128,1,fp8,fp8,0,0.038378665844599404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,64,128,1,float16,fp8,0,0.03982933362325033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,64,0,1,float16,fp8,0,0.039701332648595176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,64,128,1,float16,float16,0,0.027450665831565857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,64,0,1,fp8,fp8,0,0.03777066618204117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,64,0,1,float16,float16,0,0.027461332579453785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,64,128,1,float16,fp8,0,0.027376001079877216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,64,128,1,fp8,fp8,0,0.025493333737055462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,64,0,1,float16,fp8,0,0.027285332481066387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,64,0,1,fp8,fp8,0,0.025722667574882507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,64,128,1,float16,float16,0,0.026176000634829204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,64,0,1,float16,float16,0,0.025472000241279602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,64,128,1,fp8,fp8,0,0.025216000775496166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,64,0,1,float16,fp8,0,0.027461332579453785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,64,128,1,float16,fp8,0,0.025936000049114227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,64,0,1,fp8,fp8,0,0.02516799916823705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,64,0,1,float16,float16,0,0.02716800073782603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,64,128,1,float16,float16,0,0.027295999228954315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,64,128,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,64,128,1,float16,fp8,0,0.02741333345572154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,64,0,1,float16,fp8,0,0.027349332968393963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,64,0,1,fp8,fp8,0,0.025349333882331848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,64,128,1,float16,float16,0,0.025216000775496166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,64,128,1,float16,fp8,0,0.02555199960867564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,64,0,1,float16,float16,0,0.026549334327379864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,64,128,1,fp8,fp8,0,0.02568000058333079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,64,0,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,64,0,1,fp8,fp8,0,0.025786665578683216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,64,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,64,128,1,float16,float16,0,0.01933866615096728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,64,128,1,float16,fp8,0,0.01952533299724261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,64,128,1,fp8,fp8,0,0.019621333728233974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,64,0,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,64,128,1,float16,float16,0,0.019280000279347103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,64,0,1,fp8,fp8,0,0.019733333339293797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,64,0,1,float16,float16,0,0.019253333409627277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,64,128,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,64,128,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,64,0,1,float16,fp8,0,0.019466667125622433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,64,0,1,fp8,fp8,0,0.018810667097568512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,64,128,1,float16,float16,0,0.019365333020687103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,64,0,1,float16,float16,0,0.019098666807015736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,64,128,1,float16,fp8,0,0.01953599974513054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,64,128,1,fp8,fp8,0,0.018800000349680584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,64,0,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,64,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,64,0,1,float16,float16,0,0.01894933357834816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,64,128,1,float16,float16,0,0.019424000134070713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,64,128,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,64,128,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,64,0,1,float16,fp8,0,0.01926933353145917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,64,0,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,64,0,1,float16,float16,0,0.01878400022784869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,64,128,1,float16,float16,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,64,128,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,64,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,64,128,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,64,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,64,0,1,float16,float16,0,0.017466666797796886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,64,128,1,float16,float16,0,0.017456000049908955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,64,0,1,float16,fp8,0,0.01844266677896182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,64,128,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,64,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,64,128,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,64,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,64,128,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,64,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,64,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,64,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,64,128,1,float16,float16,0,0.016821333517630894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,64,128,1,float16,fp8,0,0.017984000345071156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,64,0,1,float16,float16,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,64,128,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,64,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,64,128,1,float16,float16,0,0.017407999684413273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,64,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,64,128,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,64,128,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,64,0,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,64,128,1,float16,float16,0,0.019482667247454327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,64,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,64,128,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,64,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,64,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,64,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,64,128,1,float16,float16,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,64,0,1,float16,float16,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,64,128,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,64,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,64,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,64,128,1,float16,float16,0,0.017407999684413273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,64,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,64,128,1,float16,fp8,0,0.017525333911180496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,64,128,1,fp8,fp8,0,0.017450666675964992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,64,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,64,128,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,64,128,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,64,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,64,128,1,float16,float16,0,0.017349333812793095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,64,128,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,64,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,64,0,1,fp8,fp8,0,0.016095999628305435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,64,128,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,64,128,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,64,128,1,fp8,fp8,0,0.01646399994691213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,64,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,64,128,1,float16,float16,0,0.016821333517630894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,64,0,1,float16,float16,0,0.01643199970324834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,64,128,1,float16,fp8,0,0.017642666896184284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,64,128,1,fp8,fp8,0,0.01653333380818367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,64,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,64,128,1,float16,float16,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,64,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,64,128,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,64,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,64,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,64,128,1,float16,float16,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,64,0,1,float16,float16,0,0.016693333784739178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,64,128,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,64,128,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,64,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,64,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,64,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,64,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,64,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,64,128,1,float16,float16,0,0.07649066547552745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,64,0,1,float16,float16,0,0.07723199824492137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,64,128,1,float16,fp8,0,0.07649600009123485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,64,128,1,fp8,fp8,0,0.0724480003118515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,64,0,1,fp8,fp8,0,0.07077333331108093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,64,0,1,float16,fp8,0,0.07727999985218048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,64,128,1,float16,float16,0,0.078575998544693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,64,0,1,float16,float16,0,0.07840533554553986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,64,128,1,fp8,fp8,0,0.07225066423416138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,64,128,1,float16,fp8,0,0.07764266431331635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,64,0,1,float16,fp8,0,0.07671999931335449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,64,0,1,fp8,fp8,0,0.0719413310289383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,64,128,1,float16,float16,0,0.07879466811815898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,64,0,1,float16,float16,0,0.07944533228874207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,64,128,1,float16,fp8,0,0.07679999868075053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,64,128,1,fp8,fp8,0,0.07495999832948048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,64,0,1,float16,fp8,0,0.07858133316040039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,64,0,1,fp8,fp8,0,0.0749066670735677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,64,128,1,float16,float16,0,0.046037331223487854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,64,0,1,float16,float16,0,0.04584000011285146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,64,128,1,float16,fp8,0,0.04619733492533366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,64,128,1,fp8,fp8,0,0.04619733492533366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,64,0,1,float16,fp8,0,0.04603200157483419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,64,0,1,fp8,fp8,0,0.046021332343419395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,64,0,1,float16,float16,0,0.045994664231936135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,64,128,1,float16,float16,0,0.04618666569391886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,64,128,1,float16,fp8,0,0.0469706654548645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,64,128,1,fp8,fp8,0,0.04399999976158142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,64,0,1,float16,fp8,0,0.04614399870236715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,64,0,1,fp8,fp8,0,0.04397333165009817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,64,128,1,float16,float16,0,0.046112000942230225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,64,0,1,float16,float16,0,0.045925334095954895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,64,128,1,float16,fp8,0,0.04781866570313772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,64,128,1,fp8,fp8,0,0.04382933179537455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,64,0,1,float16,fp8,0,0.04584533472855886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,64,0,1,fp8,fp8,0,0.04387199878692627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,64,128,1,float16,float16,0,0.046053335070610046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,64,0,1,float16,float16,0,0.04619200030962626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,64,128,1,float16,fp8,0,0.046800002455711365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,64,128,1,fp8,fp8,0,0.04587199787298838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,64,0,1,float16,fp8,0,0.04784533381462097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,64,0,1,fp8,fp8,0,0.04610133171081543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,64,128,1,float16,float16,0,0.03182933231194814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,64,0,1,float16,float16,0,0.031354665756225586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,64,128,1,float16,fp8,0,0.03159466634194056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,64,128,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,64,0,1,float16,fp8,0,0.03145066648721695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,64,0,1,fp8,fp8,0,0.030016000072161358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,64,128,1,float16,float16,0,0.03130666663249334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,64,0,1,float16,float16,0,0.029669334491093952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,64,128,1,float16,fp8,0,0.029546665648619335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,64,128,1,fp8,fp8,0,0.029477333029111225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,64,0,1,float16,fp8,0,0.031354665756225586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,64,0,1,fp8,fp8,0,0.02917333443959554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,64,128,1,float16,float16,0,0.030026666820049286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,64,0,1,float16,float16,0,0.031343999008337654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,64,128,1,float16,fp8,0,0.031712000568707786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,64,128,1,fp8,fp8,0,0.029279999434947968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,64,0,1,float16,fp8,0,0.031258667508761086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,64,0,1,fp8,fp8,0,0.029279999434947968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,64,0,1,float16,float16,0,0.031632001201311745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,64,128,1,float16,float16,0,0.03183466692765554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,64,128,1,float16,fp8,0,0.031317333380381264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,64,128,1,fp8,fp8,0,0.03159466634194056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,64,128,1,float16,float16,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,64,0,1,float16,float16,0,0.021375998854637146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,64,0,1,float16,fp8,0,0.03180266668399175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,64,0,1,fp8,fp8,0,0.02943466603755951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,64,128,1,float16,fp8,0,0.021349333226680756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,64,128,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,64,0,1,float16,fp8,0,0.02306666721900304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,64,0,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,64,128,1,float16,float16,0,0.02143999934196472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,64,0,1,float16,float16,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,64,128,1,float16,fp8,0,0.022805333137512207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,64,128,1,fp8,fp8,0,0.021407999098300934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,64,0,1,float16,fp8,0,0.02142400046189626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,64,0,1,fp8,fp8,0,0.02080533280968666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,64,128,1,float16,float16,0,0.02149333308140437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,64,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,64,128,1,float16,fp8,0,0.022389332453409832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,64,128,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,64,0,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,64,0,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,64,0,1,float16,float16,0,0.022831998765468597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,64,128,1,float16,float16,0,0.021914665897687275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,64,128,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,64,128,1,fp8,fp8,0,0.02142400046189626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,64,0,1,float16,fp8,0,0.02258133391539256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,64,0,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,64,128,1,float16,float16,0,0.017445333302021027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,64,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,64,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,64,128,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,64,0,1,fp8,fp8,0,0.018272000054518383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,64,0,1,float16,float16,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,64,128,1,float16,fp8,0,0.017818666994571686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,64,128,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,64,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,64,128,1,float16,float16,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,64,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,64,128,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,64,128,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,64,0,1,float16,fp8,0,0.018592000007629395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,64,0,1,fp8,fp8,0,0.017935999979575474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,64,128,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,64,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,64,128,1,float16,fp8,0,0.018826667219400406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,64,128,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,64,0,1,fp8,fp8,0,0.018122666825850803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,64,0,1,float16,float16,0,0.015909332782030106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,64,128,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,64,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,64,128,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,64,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,64,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,64,128,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,64,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,64,128,1,float16,fp8,0,0.017616000026464462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,64,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,64,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,64,0,1,fp8,fp8,0,0.017423999806245167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,64,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,64,128,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,64,128,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,64,128,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,64,128,1,float16,float16,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,64,128,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,64,128,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,64,128,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,64,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,64,128,1,float16,float16,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,64,0,1,float16,float16,0,0.015376000354687372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,64,128,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,64,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,64,128,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,64,128,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,64,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,64,128,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,64,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,64,128,1,float16,float16,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,64,128,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,64,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,64,128,1,float16,float16,0,0.016789333273967106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,64,128,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,64,128,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,64,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,64,128,1,float16,float16,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,64,0,1,float16,float16,0,0.015344000111023584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,64,128,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,64,128,1,fp8,fp8,0,0.015967999895413715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,64,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,64,128,1,float16,float16,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,64,0,1,float16,float16,0,0.016634666671355564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,64,128,1,fp8,fp8,0,0.016165333489576977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,64,128,1,float16,float16,0,0.06638933221499126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,64,128,1,float16,fp8,0,0.06454400221506755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,64,0,1,float16,float16,0,0.06495466828346252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,64,128,1,fp8,fp8,0,0.06020799775918325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,64,0,1,float16,fp8,0,0.06404266754786174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,64,0,1,fp8,fp8,0,0.0606826643149058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,64,128,1,float16,float16,0,0.06425599753856659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,64,0,1,float16,float16,0,0.06428266565004985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,64,128,1,float16,fp8,0,0.06457066535949707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,64,128,1,fp8,fp8,0,0.06006399790445963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,64,0,1,float16,fp8,0,0.0644053320089976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,64,0,1,fp8,fp8,0,0.060773332913716636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,64,128,1,float16,float16,0,0.0645066648721695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,64,0,1,float16,float16,0,0.06629866858323415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,64,128,1,fp8,fp8,0,0.06232533355553945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,64,128,1,float16,fp8,0,0.0668213317791621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,64,0,1,float16,fp8,0,0.0642986645301183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,64,128,1,float16,float16,0,0.04004266609748205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,64,0,1,float16,float16,0,0.03758399933576584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,64,0,1,fp8,fp8,0,0.062352001667022705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,64,128,1,float16,fp8,0,0.03979733337958654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,64,128,1,fp8,fp8,0,0.03753600021203359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,64,0,1,float16,fp8,0,0.039781334499518074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,64,128,1,float16,float16,0,0.037791999677817024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,64,0,1,fp8,fp8,0,0.0377813329299291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,64,0,1,float16,float16,0,0.03959999978542328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,64,128,1,float16,fp8,0,0.03775466730197271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,64,128,1,fp8,fp8,0,0.03760000069936117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,64,0,1,float16,fp8,0,0.037733333806196846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,64,0,1,fp8,fp8,0,0.03568000098069509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,64,128,1,float16,float16,0,0.039674667020638786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,64,0,1,float16,float16,0,0.03761066744724909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,64,128,1,float16,fp8,0,0.037978666524092354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,64,128,1,fp8,fp8,0,0.035530666510264076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,64,0,1,float16,fp8,0,0.03946666667858759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,64,0,1,fp8,fp8,0,0.03773866593837738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,64,128,1,float16,float16,0,0.03879466652870178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,64,0,1,float16,float16,0,0.03969600051641464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,64,128,1,float16,fp8,0,0.03972266614437103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,64,128,1,fp8,fp8,0,0.03759466608365377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,64,0,1,float16,fp8,0,0.039818666875362396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,64,0,1,fp8,fp8,0,0.03743999948104223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,64,128,1,float16,float16,0,0.027285332481066387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,64,0,1,float16,float16,0,0.027093333502610523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,64,128,1,float16,fp8,0,0.027119999130566914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,64,128,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,64,0,1,float16,fp8,0,0.027376001079877216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,64,0,1,fp8,fp8,0,0.027280000348885853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,64,128,1,float16,float16,0,0.027322667340437572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,64,0,1,float16,float16,0,0.02555199960867564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,64,128,1,float16,fp8,0,0.027552001178264618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,64,128,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,64,0,1,float16,fp8,0,0.027237333357334137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,64,128,1,float16,float16,0,0.027189334233601887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,64,0,1,float16,float16,0,0.027376001079877216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,64,0,1,fp8,fp8,0,0.026202666262785595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,64,128,1,float16,fp8,0,0.027050666511058807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,64,0,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,64,128,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,64,128,1,float16,float16,0,0.026378666361172993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,64,0,1,fp8,fp8,0,0.025834667185942333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,64,0,1,float16,float16,0,0.026863999664783478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,64,128,1,float16,fp8,0,0.02722666660944621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,64,128,1,fp8,fp8,0,0.025727999707063038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,64,0,1,float16,fp8,0,0.027776000400384266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,64,128,1,float16,float16,0,0.01942933350801468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,64,0,1,float16,float16,0,0.021040000021457672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,64,0,1,fp8,fp8,0,0.027424000203609467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,64,128,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,64,128,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,64,0,1,float16,fp8,0,0.02120000123977661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,64,0,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,64,128,1,float16,float16,0,0.01904533306757609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,64,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,64,128,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,64,128,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,64,0,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,64,0,1,fp8,fp8,0,0.021205333371957142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,64,128,1,float16,float16,0,0.020997333029905956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,64,0,1,float16,float16,0,0.01937066639463107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,64,128,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,64,128,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,64,0,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,64,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,64,128,1,float16,float16,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,64,0,1,float16,float16,0,0.021253332495689392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,64,128,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,64,128,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,64,0,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,64,0,1,fp8,fp8,0,0.020954666038354237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,64,128,1,float16,float16,0,0.017375999440749485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,64,0,1,float16,float16,0,0.01626666635274887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,64,128,1,float16,fp8,0,0.01670933390657107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,64,128,1,fp8,fp8,0,0.01754666616519292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,64,128,1,float16,float16,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,64,128,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,64,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,64,0,1,float16,fp8,0,0.01676799977819125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,64,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,64,128,1,float16,float16,0,0.01685333376129468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,64,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,64,128,1,fp8,fp8,0,0.016693333784739178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,64,0,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,64,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,64,128,1,float16,float16,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,64,0,1,float16,float16,0,0.015552000453074774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,64,128,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,64,128,1,fp8,fp8,0,0.01747200017174085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,64,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,64,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,64,128,1,float16,float16,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,64,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,64,128,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,64,128,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,64,128,1,float16,float16,0,0.015077333897352219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,64,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,64,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,64,128,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,64,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,64,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,64,128,1,float16,float16,0,0.01543466622630755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,64,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,64,128,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,64,128,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,64,0,1,float16,fp8,0,0.016496000190575916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,64,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,64,128,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,64,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,64,128,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,64,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,64,128,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,64,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,64,0,1,fp8,fp8,0,0.015487999965747198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,64,128,1,float16,float16,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,64,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,64,128,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,64,128,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,64,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,64,128,1,float16,float16,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,64,128,1,float16,fp8,0,0.015381333728631338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,64,128,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,64,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,64,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,64,128,1,float16,fp8,0,0.015471999843915304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,64,128,1,fp8,fp8,0,0.015461333096027374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,64,0,1,fp8,fp8,0,0.015493333339691162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,64,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,64,128,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,64,0,1,float16,fp8,0,0.015546667079130808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,64,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,64,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,64,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,64,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,64,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,64,128,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,64,128,1,float16,float16,0,0.01544533297419548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,64,128,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,64,128,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,64,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,64,0,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,64,128,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,64,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,64,128,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,64,0,1,fp8,fp8,0,0.015397333850463232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,64,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,64,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,64,128,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,64,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,64,0,1,fp8,fp8,0,0.015743999431530636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,64,128,1,float16,float16,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,64,0,1,float16,float16,0,0.016741332908471424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,64,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,64,128,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,64,0,1,float16,float16,0,0.01762666677435239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,64,128,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,64,0,1,float16,fp8,0,0.015605332950750986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,64,0,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,64,128,1,float16,float16,0,0.05649599929650625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,64,0,1,float16,float16,0,0.05614933371543884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,64,128,1,float16,fp8,0,0.05861866474151611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,64,128,1,fp8,fp8,0,0.05329066514968872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,64,0,1,float16,fp8,0,0.05638400216897329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,64,128,1,float16,float16,0,0.057962665955225624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,64,0,1,fp8,fp8,0,0.054144000013669334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,64,0,1,float16,float16,0,0.056330665946006775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,64,128,1,float16,fp8,0,0.0582239975531896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,64,0,1,float16,fp8,0,0.058117335041364036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,64,128,1,fp8,fp8,0,0.05309866865475973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,64,0,1,fp8,fp8,0,0.053898667295773826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,64,128,1,float16,float16,0,0.05813866853713989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,64,0,1,float16,float16,0,0.056186666091283165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,64,128,1,float16,fp8,0,0.05818133552869161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,64,128,1,fp8,fp8,0,0.053898667295773826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,64,0,1,float16,fp8,0,0.056927998860677086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,64,0,1,fp8,fp8,0,0.05395199855168661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,64,128,1,float16,float16,0,0.033600000043710075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,64,0,1,float16,float16,0,0.0337119996547699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,64,128,1,float16,fp8,0,0.0347680002450943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,64,128,1,fp8,fp8,0,0.03395200024048487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,64,0,1,float16,fp8,0,0.035786665976047516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,64,0,1,fp8,fp8,0,0.03398400048414866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,64,128,1,float16,float16,0,0.03541333228349686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,64,128,1,float16,fp8,0,0.03575466573238373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,64,0,1,float16,float16,0,0.035487999518712364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,64,128,1,fp8,fp8,0,0.033813332517941795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,64,0,1,float16,fp8,0,0.03391999999682108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,64,0,1,fp8,fp8,0,0.03330666571855545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,64,128,1,float16,float16,0,0.035487999518712364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,64,0,1,float16,float16,0,0.033674667278925575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,64,128,1,float16,fp8,0,0.03391999999682108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,64,128,1,fp8,fp8,0,0.0331839993596077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,64,0,1,float16,fp8,0,0.03391999999682108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,64,0,1,fp8,fp8,0,0.0341386670867602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,64,128,1,float16,float16,0,0.033674667278925575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,64,0,1,float16,float16,0,0.03370666752258936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,64,128,1,float16,fp8,0,0.035631999373435974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,64,128,1,fp8,fp8,0,0.03340800106525421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,64,0,1,float16,fp8,0,0.03536533315976461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,64,128,1,float16,float16,0,0.025274666647116344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,64,0,1,fp8,fp8,0,0.03376533339420954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,64,0,1,float16,float16,0,0.02554133286078771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,64,128,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,64,128,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,64,0,1,float16,fp8,0,0.02537599951028824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,64,0,1,fp8,fp8,0,0.02532800038655599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,64,0,1,float16,float16,0,0.025461333493391674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,64,128,1,float16,float16,0,0.024469333390394848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,64,128,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,64,128,1,fp8,fp8,0,0.0249493345618248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,64,0,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,64,128,1,float16,float16,0,0.025381334125995636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,64,0,1,fp8,fp8,0,0.023056000471115112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,64,0,1,float16,float16,0,0.025050667424996693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,64,128,1,float16,fp8,0,0.025360000630219776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,64,128,1,fp8,fp8,0,0.023168000082174938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,64,0,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,64,0,1,fp8,fp8,0,0.023541333774725597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,64,128,1,float16,float16,0,0.02513599892457326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,64,0,1,float16,float16,0,0.025253333151340485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,64,128,1,float16,fp8,0,0.025279998779296875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,64,128,1,fp8,fp8,0,0.02387733260790507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,64,128,1,float16,float16,0,0.019082666685183842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,64,0,1,float16,fp8,0,0.02532266577084859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,64,0,1,float16,float16,0,0.01966933285196622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,64,0,1,fp8,fp8,0,0.02513599892457326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,64,128,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,64,128,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,64,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,64,0,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,64,128,1,float16,float16,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,64,0,1,float16,float16,0,0.01922133316596349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,64,128,1,float16,fp8,0,0.019823999454577763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,64,128,1,fp8,fp8,0,0.01961600035429001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,64,0,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,64,0,1,fp8,fp8,0,0.018800000349680584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,64,128,1,float16,float16,0,0.01926400015751521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,64,0,1,float16,float16,0,0.01903466631968816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,64,128,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,64,128,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,64,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,64,0,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,64,128,1,float16,float16,0,0.019066666563351948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,64,0,1,float16,float16,0,0.019248000035683315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,64,128,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,64,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,64,0,1,float16,fp8,0,0.019920000185569126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,64,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,64,128,1,float16,float16,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,64,0,1,float16,float16,0,0.017349333812793095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,64,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,64,128,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,64,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,64,128,1,float16,float16,0,0.015781333049138386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,64,0,1,float16,float16,0,0.015477333217859268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,64,128,1,float16,float16,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,64,0,1,float16,float16,0,0.015530666957298914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,64,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,64,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,64,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,64,128,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,64,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,64,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,64,128,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,64,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,64,128,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,64,128,1,float16,float16,0,0.015306666493415833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,64,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,64,128,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,64,128,1,fp8,fp8,0,0.01545599972208341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,64,0,1,float16,fp8,0,0.017498667041460674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,64,128,1,float16,float16,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,64,128,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,64,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,64,128,1,float16,float16,0,0.015061333775520325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,64,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,64,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,64,128,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,64,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,64,128,1,float16,float16,0,0.015050667027632395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,64,0,1,float16,float16,0,0.018592000007629395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,64,128,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,64,128,1,fp8,fp8,0,0.015583999454975128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,64,0,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,64,128,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,64,128,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,64,128,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,64,0,1,float16,float16,0,0.01682666689157486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,64,128,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,64,128,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,64,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,64,128,1,float16,float16,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,64,0,1,float16,float16,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,64,128,1,float16,fp8,0,0.015685333559910457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,64,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,64,128,1,float16,float16,0,0.015311999867359797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,64,128,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,64,128,1,fp8,fp8,0,0.015370666980743408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,64,0,1,float16,fp8,0,0.016037333756685257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,64,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,64,128,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,64,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,64,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,64,128,1,float16,float16,0,0.014853333433469137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,64,128,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,64,128,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,64,0,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,64,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,64,128,1,float16,float16,0,0.016688000410795212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,64,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,64,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,64,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,64,0,1,float16,float16,0,0.016421332955360413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,64,128,1,float16,float16,0,0.016858667135238647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,64,128,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,64,0,1,float16,fp8,0,0.01613333324591319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,64,128,1,float16,float16,0,0.015360000232855478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,64,128,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,64,128,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,64,0,1,float16,float16,0,0.015802666544914246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,64,0,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,64,128,1,float16,float16,0,0.015834666788578033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,64,128,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,64,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,64,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,64,128,1,float16,float16,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,64,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,64,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,64,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,64,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,0,0.05021866659323374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,0,0.050069332122802734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,0,0.05226666728655497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,1,64,128,1,fp8,fp8,0,0.047839999198913574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,0,0.04985600213209788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,1,64,0,1,fp8,fp8,0,0.048138668139775596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,0,0.050026665131251015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,0,0.051872000098228455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,0,0.05041599770387014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,2,64,128,1,fp8,fp8,0,0.04773866633574168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,0,0.0518453319867452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,2,64,0,1,fp8,fp8,0,0.047968000173568726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,0,0.04991999765237173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,0,0.049866666396458946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,0,0.050479998191197716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,4,64,128,1,fp8,fp8,0,0.04790933430194855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,0,0.04982399940490723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,8,64,128,1,float16,float16,0,0.031231999397277832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,0,0.031717332700888314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,4,64,0,1,fp8,fp8,0,0.048250665267308555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,8,64,128,1,float16,fp8,0,0.03141333411137263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,8,64,128,1,fp8,fp8,0,0.029845332105954487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,0,0.033413333197434746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,8,64,0,1,fp8,fp8,0,0.03137599925200144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,0,0.03186666717131933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,0,0.03164800008138021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,0,0.03136533250411352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,1,64,128,1,fp8,fp8,0,0.031258667508761086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,0,0.031770666440327965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,1,64,0,1,fp8,fp8,0,0.031328000128269196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,0,0.03153600047032038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,0,0.03200000027815501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,0,0.03306133300065994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,2,64,128,1,fp8,fp8,0,0.03146133323510488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,0,0.033071999748547874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,2,64,0,1,fp8,fp8,0,0.029552000264326733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,0,0.033258666594823204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,0,0.03176533430814743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,0,0.03163733333349228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,4,64,128,1,fp8,fp8,0,0.029450667401154835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,0,0.03342933456103007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,8,64,128,1,float16,float16,0,0.023370665808518726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,4,64,0,1,fp8,fp8,0,0.03145066648721695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,8,64,128,1,float16,fp8,0,0.023408000667889912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,0,0.026149332523345947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,8,64,128,1,fp8,fp8,0,0.023226665953795116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,8,64,0,1,fp8,fp8,0,0.02319466571013133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,0,0.02325333406527837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,0,0.023930666347344715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,1,64,128,1,fp8,fp8,0,0.023520000278949738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,1,64,0,1,fp8,fp8,0,0.0235359991590182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,0,0.023344000180562336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,0,0.023797333240509033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,2,64,128,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,2,64,0,1,fp8,fp8,0,0.023647998770078022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,0,0.025311999022960663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,0,0.025370667378107708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,0,0.02513066679239273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,4,64,128,1,fp8,fp8,0,0.02380799998839696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,0,0.025306666890780132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,4,64,0,1,fp8,fp8,0,0.023103999594847362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,8,64,128,1,float16,float16,0,0.01941866676012675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,8,64,128,1,float16,fp8,0,0.02085866779088974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,8,64,128,1,fp8,fp8,0,0.018800000349680584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,0,0.02081599955757459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,8,64,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,0,0.01941866676012675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,0,0.019274666905403137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,1,64,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,0,0.019386666516462963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,1,64,0,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,0,0.019018666197856266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,0,0.01921066641807556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,0,0.01939733326435089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,2,64,128,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,0,0.02051199972629547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,2,64,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,0,0.019098666807015736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,0,0.019152000546455383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,0,0.0206133338312308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,4,64,128,1,fp8,fp8,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,4,64,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,8,64,128,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,0,0.016490666816631954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,8,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,8,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,8,64,0,1,fp8,fp8,0,0.015386667102575302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,0,0.015919999529918034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,1,64,128,1,fp8,fp8,0,0.01568000018596649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,1,64,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,0,0.01670933390657107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,2,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,2,64,0,1,fp8,fp8,0,0.015423999478419622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,4,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,4,64,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,8,64,128,1,float16,float16,0,0.014922666052977243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,8,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,8,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,0,0.016202667107184727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,8,64,0,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,1,64,128,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,1,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,2,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,2,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,4,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,0,0.017525333911180496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,4,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,8,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,0,0.016794666647911072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,8,64,128,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,8,64,128,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,8,64,0,1,fp8,fp8,0,0.016714667280515034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,1,64,128,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,1,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,0,0.016735999534527462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,0,0.015397333850463232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,2,64,128,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,2,64,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,0,0.014789332946141561
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,4,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,4,64,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,8,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,0,0.015482666591803232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,8,64,128,1,float16,fp8,0,0.015637333194414776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,8,64,128,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,8,64,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,0,0.016234666109085083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,1,64,128,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,1,64,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,2,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,0,0.01617066686352094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,2,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,0,0.01590399940808614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,4,64,128,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,0,0.016063999384641647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,4,64,0,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,8,64,128,1,float16,float16,0,0.01515199989080429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,8,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,8,64,128,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,8,64,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,0,0.014864000181357065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,1,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,1,64,0,1,fp8,fp8,0,0.015413332730531693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,2,64,128,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,2,64,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,4,64,128,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,4,64,0,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,64,128,1,float16,float16,0,0.2392959992090861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,64,128,1,float16,fp8,0,0.24279467264811197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,64,128,1,fp8,fp8,0,0.22950933376948038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,64,0,1,float16,float16,0,1.4360159238179524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,64,128,1,float16,float16,0,0.2512800097465515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,64,0,1,float16,fp8,0,1.4355039596557617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,64,0,1,fp8,fp8,0,1.337557315826416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,64,128,1,float16,fp8,0,0.2547360062599182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,64,128,1,fp8,fp8,0,0.2425439953804016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,64,0,1,float16,float16,0,1.4461119969685872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,64,128,1,float16,float16,0,0.14484799901644388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,64,0,1,float16,fp8,0,1.4489760398864746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,64,0,1,fp8,fp8,0,1.3515839576721191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,64,128,1,float16,fp8,0,0.14856533209482828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,64,128,1,fp8,fp8,0,0.14444266756375632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,64,0,1,float16,float16,0,0.8006292978922526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,64,128,1,float16,float16,0,0.12804800271987915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,64,0,1,fp8,fp8,0,0.7487466335296631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,64,0,1,float16,fp8,0,0.802560011545817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,64,128,1,float16,fp8,0,0.12991467118263245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,64,128,1,fp8,fp8,0,0.12583999832471213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,64,0,1,float16,float16,0,0.7855146725972494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,64,128,1,float16,float16,0,0.1343839963277181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,64,0,1,float16,fp8,0,0.7824479738871256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,64,0,1,fp8,fp8,0,0.7294399738311768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,64,128,1,float16,fp8,0,0.1378506620724996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,64,128,1,fp8,fp8,0,0.13194132844607034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,64,0,1,float16,float16,0,0.7900959650675455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,64,128,1,float16,float16,0,0.08522666494051616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,64,0,1,float16,fp8,0,0.7874346574147543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,64,128,1,float16,fp8,0,0.0888266662756602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,64,0,1,float16,float16,0,0.4679199854532878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,64,0,1,fp8,fp8,0,0.7377866903940836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,64,128,1,fp8,fp8,0,0.08878933389981587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,64,128,1,float16,float16,0,0.08010133107503255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,64,0,1,float16,fp8,0,0.4696906805038452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,64,0,1,fp8,fp8,0,0.4385066827138265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,64,128,1,float16,fp8,0,0.08052800099054973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,64,0,1,float16,float16,0,0.4635466734568278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,64,128,1,fp8,fp8,0,0.07692266503969829
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,64,128,1,float16,float16,0,0.08266666531562805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,64,0,1,float16,fp8,0,0.46460266908009845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,64,0,1,fp8,fp8,0,0.4261653423309326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,64,128,1,float16,fp8,0,0.08321600159009297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,64,0,1,float16,float16,0,0.4614186684290568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,64,128,1,fp8,fp8,0,0.07876266539096832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,64,0,1,float16,fp8,0,0.46535468101501465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,64,128,1,float16,float16,0,0.06417066852251689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,64,0,1,float16,float16,0,0.31153066953023273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,64,0,1,fp8,fp8,0,0.42900800704956055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,64,128,1,float16,fp8,0,0.06443200012048085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,64,128,1,fp8,fp8,0,0.06225599845250448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,64,0,1,float16,fp8,0,0.31066133578618366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,64,128,1,float16,float16,0,0.06438933312892914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,64,0,1,fp8,fp8,0,0.2877066731452942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,64,0,1,float16,float16,0,0.3129653334617615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,64,128,1,float16,fp8,0,0.06445866823196411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,64,128,1,fp8,fp8,0,0.062362665931383766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,64,0,1,float16,fp8,0,0.3111093242963155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,64,0,1,fp8,fp8,0,0.28989332914352417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,64,128,1,float16,float16,0,0.06449066599210103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,64,128,1,float16,fp8,0,0.06460266808668773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,64,128,1,fp8,fp8,0,0.062165334820747375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,64,0,1,float16,float16,0,0.3104586601257324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,64,128,1,float16,float16,0,0.1840320030848185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,64,0,1,fp8,fp8,0,0.2882240017255147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,64,0,1,float16,fp8,0,0.31178667147954303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,64,128,1,float16,fp8,0,0.18521066506703696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,64,128,1,fp8,fp8,0,0.17685866355895996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,64,0,1,float16,float16,0,0.8698933124542236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,64,0,1,fp8,fp8,0,0.8146719932556152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,64,128,1,float16,float16,0,0.19148266315460205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,64,0,1,float16,fp8,0,0.8722879886627197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,64,128,1,float16,fp8,0,0.19387733936309814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,64,128,1,fp8,fp8,0,0.1860533356666565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,64,0,1,float16,float16,0,0.8817546367645264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,64,128,1,float16,float16,0,0.1146506667137146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,64,0,1,fp8,fp8,0,0.8225173155466715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,64,0,1,float16,fp8,0,0.881498654683431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,64,128,1,float16,fp8,0,0.11742933591206868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,64,128,1,fp8,fp8,0,0.11533332864443462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,64,0,1,float16,float16,0,0.5009066661198934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,64,128,1,float16,float16,0,0.10136000315348308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,64,0,1,float16,fp8,0,0.50054931640625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,64,0,1,fp8,fp8,0,0.46902398268381756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,64,128,1,float16,fp8,0,0.1032480001449585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,64,128,1,fp8,fp8,0,0.09809066851933797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,64,0,1,float16,float16,0,0.4894346793492635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,64,128,1,float16,float16,0,0.10513066252072652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,64,0,1,float16,fp8,0,0.48687465985616046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,64,0,1,fp8,fp8,0,0.4506880044937134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,64,128,1,float16,fp8,0,0.10770133137702942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,64,128,1,fp8,fp8,0,0.10533866286277771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,64,0,1,float16,float16,0,0.4936319986979167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,64,0,1,float16,fp8,0,0.4901386499404907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,64,128,1,float16,float16,0,0.06700266897678375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,64,0,1,fp8,fp8,0,0.45791467030843097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,64,128,1,float16,fp8,0,0.06820266445477803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,64,0,1,float16,float16,0,0.30209600925445557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,64,128,1,fp8,fp8,0,0.06858133276303609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,64,128,1,float16,float16,0,0.06449066599210103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,64,0,1,float16,fp8,0,0.3023359974225362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,64,0,1,fp8,fp8,0,0.2797013322512309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,64,128,1,float16,fp8,0,0.06447466711203258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,64,0,1,float16,float16,0,0.30053865909576416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,64,128,1,fp8,fp8,0,0.06192533175150553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,64,0,1,float16,fp8,0,0.29812800884246826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,64,128,1,float16,float16,0,0.0644053320089976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,64,0,1,fp8,fp8,0,0.27561066548029584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,64,0,1,float16,float16,0,0.29747732480367023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,64,128,1,float16,fp8,0,0.06637333333492279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,64,128,1,fp8,fp8,0,0.062421331803003945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,64,128,1,float16,float16,0,0.05583466589450836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,64,0,1,float16,fp8,0,0.30021866162618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,64,0,1,fp8,fp8,0,0.2760106722513835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,64,0,1,float16,float16,0,0.20566399892171225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,64,128,1,float16,fp8,0,0.05597866574923197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,64,128,1,fp8,fp8,0,0.053786665201187134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,64,0,1,float16,fp8,0,0.20494933923085532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,64,0,1,fp8,fp8,0,0.1897439956665039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,64,128,1,float16,float16,0,0.054287999868392944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,64,0,1,float16,float16,0,0.2055413325627645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,64,128,1,float16,fp8,0,0.05592533449331919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,64,128,1,fp8,fp8,0,0.051925331354141235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,64,0,1,float16,fp8,0,0.20259199539820352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,64,128,1,float16,float16,0,0.05397333204746246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,64,0,1,fp8,fp8,0,0.18943999210993448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,64,0,1,float16,float16,0,0.20404267311096191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,64,128,1,float16,fp8,0,0.0558240016301473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,64,128,1,fp8,fp8,0,0.0517546683549881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,64,0,1,float16,fp8,0,0.2041920026143392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,64,0,1,fp8,fp8,0,0.18979199727376303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,64,128,1,float16,float16,0,0.15408000349998474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,64,128,1,float16,fp8,0,0.15455466508865356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,64,128,1,fp8,fp8,0,0.1481706698735555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,64,0,1,float16,float16,0,0.6396106481552124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,64,128,1,float16,float16,0,0.1606880029042562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,64,0,1,float16,fp8,0,0.6415359973907471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,64,0,1,fp8,fp8,0,0.5972853501637777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,64,128,1,float16,fp8,0,0.16299200057983398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,64,128,1,fp8,fp8,0,0.1562933325767517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,64,0,1,float16,float16,0,0.6458986600240072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,64,128,1,float16,float16,0,0.09572266538937886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,64,0,1,float16,fp8,0,0.6497013171513876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,64,0,1,float16,float16,0,0.3745386600494385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,64,0,1,fp8,fp8,0,0.6051520109176636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,64,128,1,float16,fp8,0,0.09898133079210918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,64,128,1,fp8,fp8,0,0.09903466701507568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,64,128,1,float16,float16,0,0.08689066767692566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,64,0,1,float16,fp8,0,0.37595200538635254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,64,0,1,fp8,fp8,0,0.35307733217875165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,64,128,1,float16,fp8,0,0.08889599641164143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,64,128,1,fp8,fp8,0,0.0827946662902832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,64,0,1,float16,float16,0,0.3682560125986735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,64,128,1,float16,float16,0,0.08907199899355571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,64,0,1,float16,fp8,0,0.36694931983947754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,64,0,1,fp8,fp8,0,0.33885331948598224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,64,0,1,float16,float16,0,0.3691893418629964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,64,128,1,float16,fp8,0,0.09307733178138733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,64,128,1,fp8,fp8,0,0.08773333827654521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,64,0,1,float16,fp8,0,0.3734133243560791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,64,128,1,float16,float16,0,0.06043733159701029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,64,0,1,fp8,fp8,0,0.3415786822636922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,64,128,1,float16,fp8,0,0.0634933312733968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,64,0,1,float16,float16,0,0.2339093287785848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,64,128,1,fp8,fp8,0,0.06030400097370148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,64,0,1,float16,fp8,0,0.23505600293477377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,64,128,1,float16,float16,0,0.05886933207511902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,64,0,1,fp8,fp8,0,0.22010133663813272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,64,128,1,float16,fp8,0,0.05795733133951823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,64,128,1,fp8,fp8,0,0.056186666091283165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,64,0,1,float16,float16,0,0.23251734177271524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,64,128,1,float16,float16,0,0.05832533538341522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,64,0,1,float16,fp8,0,0.23244800170262656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,64,0,1,fp8,fp8,0,0.21549866596857706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,64,128,1,float16,fp8,0,0.060405333836873375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,64,128,1,fp8,fp8,0,0.056661332647005715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,64,0,1,float16,float16,0,0.23241066932678223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,64,128,1,float16,float16,0,0.04995200037956238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,64,0,1,float16,fp8,0,0.23280000686645508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,64,0,1,fp8,fp8,0,0.2159199913342794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,64,0,1,float16,float16,0,0.15404799580574036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,64,128,1,float16,fp8,0,0.05006400247414907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,64,128,1,fp8,fp8,0,0.048351998130480446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,64,128,1,float16,float16,0,0.0503359983364741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,64,0,1,float16,fp8,0,0.15260799725850424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,64,0,1,fp8,fp8,0,0.14328533411026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,64,128,1,float16,fp8,0,0.050069332122802734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,64,128,1,fp8,fp8,0,0.04807466765244802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,64,0,1,float16,float16,0,0.15252799789110819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,64,0,1,float16,fp8,0,0.15242133537928262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,64,128,1,float16,float16,0,0.05002133548259735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,64,0,1,fp8,fp8,0,0.14320000012715658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,64,128,1,float16,fp8,0,0.04997866849104563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,64,128,1,fp8,fp8,0,0.04791999856630961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,64,0,1,float16,float16,0,0.15153599778811136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,64,0,1,fp8,fp8,0,0.14230400323867798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,64,0,1,float16,fp8,0,0.154639999071757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,64,128,1,float16,float16,0,0.2341653307278951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,64,128,1,float16,fp8,0,0.23865600426991782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,64,128,1,fp8,fp8,0,0.22446399927139282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,64,0,1,float16,float16,0,0.8185226917266846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,64,0,1,float16,fp8,0,0.8243093490600586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,64,128,1,float16,float16,0,0.24685867627461752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,64,0,1,fp8,fp8,0,0.7633546988169352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,64,128,1,float16,fp8,0,0.24952532847722372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,64,128,1,fp8,fp8,0,0.23865065972010294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,64,0,1,float16,float16,0,0.8298239707946777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,64,128,1,float16,float16,0,0.1397813359896342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,64,0,1,float16,fp8,0,0.8355200290679932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,64,128,1,float16,fp8,0,0.14206399520238241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,64,0,1,fp8,fp8,0,0.77564803759257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,64,0,1,float16,float16,0,0.46245332558949787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,64,128,1,fp8,fp8,0,0.1376213332017263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,64,128,1,float16,float16,0,0.12129066387812297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,64,0,1,float16,fp8,0,0.4623200098673503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,64,0,1,fp8,fp8,0,0.43308266003926593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,64,128,1,float16,fp8,0,0.12405332922935486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,64,128,1,fp8,fp8,0,0.11942399541536967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,64,0,1,float16,float16,0,0.44658132394154865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,64,0,1,float16,fp8,0,0.44491732120513916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,64,128,1,float16,float16,0,0.12822932998339334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,64,0,1,fp8,fp8,0,0.41705600420633954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,64,128,1,float16,fp8,0,0.12983466188112894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,64,0,1,float16,float16,0,0.4514240026473999
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,64,128,1,fp8,fp8,0,0.12706133723258972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,64,128,1,float16,float16,0,0.07905599971612294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,64,0,1,float16,float16,0,0.26770132780075073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,64,0,1,float16,fp8,0,0.4547893206278483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,64,0,1,fp8,fp8,0,0.42242666085561115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,64,128,1,float16,fp8,0,0.08150400221347809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,64,128,1,fp8,fp8,0,0.08069866895675659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,64,128,1,float16,float16,0,0.07250133156776428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,64,0,1,float16,fp8,0,0.26978133122126263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,64,0,1,fp8,fp8,0,0.25221866369247437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,64,0,1,float16,float16,0,0.26156800985336304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,64,128,1,float16,fp8,0,0.07275733351707458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,64,128,1,fp8,fp8,0,0.06923733154932658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,64,0,1,float16,fp8,0,0.26320000489552814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,64,128,1,float16,float16,0,0.07492800056934357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,64,0,1,fp8,fp8,0,0.241866668065389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,64,0,1,float16,float16,0,0.2644159992535909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,64,128,1,float16,fp8,0,0.07452799876530965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,64,128,1,fp8,fp8,0,0.0746506651242574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,64,128,1,float16,float16,0,0.04794133206208547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,64,0,1,float16,fp8,0,0.26425600051879883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,64,0,1,fp8,fp8,0,0.2439733346303304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,64,0,1,float16,float16,0,0.17353065808614096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,64,128,1,float16,fp8,0,0.04983466863632202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,64,128,1,fp8,fp8,0,0.04971200227737427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,64,0,1,float16,fp8,0,0.17462400595347086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,64,128,1,float16,float16,0,0.04593066871166229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,64,0,1,fp8,fp8,0,0.16289599736531576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,64,0,1,float16,float16,0,0.17108267545700073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,64,128,1,float16,fp8,0,0.04780800143877665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,64,128,1,fp8,fp8,0,0.045738667249679565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,64,0,1,float16,fp8,0,0.17118932803471884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,64,128,1,float16,float16,0,0.04791999856630961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,64,0,1,fp8,fp8,0,0.15689599514007568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,64,0,1,float16,float16,0,0.17094399531682333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,64,128,1,float16,fp8,0,0.04807466765244802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,64,128,1,fp8,fp8,0,0.04596266647179922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,64,0,1,float16,fp8,0,0.17058134078979492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,64,0,1,fp8,fp8,0,0.15898133317629495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,64,128,1,float16,float16,0,0.037717332442601524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,64,128,1,float16,fp8,0,0.03839466720819473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,64,0,1,float16,float16,0,0.1178559958934784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,64,128,1,fp8,fp8,0,0.0376800000667572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,64,0,1,float16,fp8,0,0.12012267112731934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,64,0,1,fp8,fp8,0,0.11339733004570007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,64,128,1,float16,float16,0,0.03956266740957896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,64,128,1,float16,fp8,0,0.039701332648595176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,64,0,1,float16,float16,0,0.1200320025285085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,64,128,1,fp8,fp8,0,0.03756266583998998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,64,0,1,float16,fp8,0,0.11957333485285442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,64,128,1,float16,float16,0,0.03937066594759623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,64,0,1,fp8,fp8,0,0.11336533228556316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,64,0,1,float16,float16,0,0.11960533261299133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,64,128,1,float16,fp8,0,0.03946666667858759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,64,128,1,fp8,fp8,0,0.03782933453718821
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,64,0,1,fp8,fp8,0,0.11172800262769063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,64,0,1,float16,fp8,0,0.11766933401425679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,64,128,1,float16,float16,0,0.18191999197006226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,64,128,1,float16,fp8,0,0.18318933248519897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,64,128,1,fp8,fp8,0,0.17433599630991617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,64,0,1,float16,float16,0,0.5124479929606119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,64,0,1,float16,fp8,0,0.5138826767603556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,64,128,1,float16,float16,0,0.1923733353614807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,64,0,1,fp8,fp8,0,0.48023466269175213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,64,128,1,float16,fp8,0,0.1936639944712321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,64,128,1,fp8,fp8,0,0.18626666069030762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,64,0,1,float16,float16,0,0.5215253432591757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,64,128,1,float16,float16,0,0.11101866761843364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,64,0,1,float16,fp8,0,0.5237013498942057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,64,0,1,fp8,fp8,0,0.48880000909169513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,64,128,1,float16,fp8,0,0.1123253305753072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,64,128,1,fp8,fp8,0,0.10975466171900432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,64,0,1,float16,float16,0,0.2974453369776408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,64,128,1,float16,float16,0,0.09753066301345825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,64,0,1,float16,fp8,0,0.2981333335240682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,64,0,1,fp8,fp8,0,0.2802346746126811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,64,128,1,float16,fp8,0,0.09911466638247173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,64,128,1,fp8,fp8,0,0.09310400485992432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,64,0,1,float16,float16,0,0.284170667330424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,64,128,1,float16,float16,0,0.0995840032895406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,64,0,1,fp8,fp8,0,0.2632906635602315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,64,0,1,float16,fp8,0,0.28591465950012207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,64,128,1,float16,fp8,0,0.1051573355992635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,64,128,1,fp8,fp8,0,0.10115733742713928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,64,0,1,float16,float16,0,0.2861386736234029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,64,128,1,float16,float16,0,0.062021334966023765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,64,0,1,float16,float16,0,0.17512534062067667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,64,0,1,float16,fp8,0,0.2897119919459025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,64,0,1,fp8,fp8,0,0.27025065819422406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,64,128,1,float16,fp8,0,0.06467733283837636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,64,128,1,fp8,fp8,0,0.06203199923038483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,64,0,1,fp8,fp8,0,0.16568533579508463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,64,0,1,float16,fp8,0,0.17855999867121378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,64,128,1,float16,float16,0,0.058149332801500954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,64,0,1,float16,float16,0,0.1710666616757711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,64,128,1,float16,fp8,0,0.058320000767707825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,64,128,1,fp8,fp8,0,0.05696000158786774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,64,0,1,float16,fp8,0,0.1718506614367167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,64,128,1,float16,float16,0,0.05825600028038025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,64,0,1,fp8,fp8,0,0.1587999959786733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,64,128,1,float16,fp8,0,0.060122668743133545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,64,0,1,float16,float16,0,0.17339199781417847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,64,128,1,fp8,fp8,0,0.05794133245944977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,64,0,1,float16,fp8,0,0.1732106606165568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,64,0,1,fp8,fp8,0,0.1609226663907369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,64,128,1,float16,float16,0,0.04394666850566864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,64,0,1,float16,float16,0,0.11557867129643758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,64,128,1,float16,fp8,0,0.04397333165009817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,64,128,1,fp8,fp8,0,0.04359466830889384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,64,0,1,float16,fp8,0,0.11612266302108765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,64,0,1,fp8,fp8,0,0.10924800237019856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,64,128,1,float16,float16,0,0.04191466669241587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,64,0,1,float16,float16,0,0.11451733112335205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,64,128,1,float16,fp8,0,0.04201066493988037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,64,128,1,fp8,fp8,0,0.03996799886226654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,64,0,1,float16,fp8,0,0.11460799972216289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,64,0,1,fp8,fp8,0,0.1056160032749176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,64,128,1,float16,float16,0,0.04191466669241587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,64,0,1,float16,float16,0,0.11356799801190694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,64,128,1,float16,fp8,0,0.0436160018046697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,64,128,1,fp8,fp8,0,0.039813332259655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,64,0,1,float16,fp8,0,0.11406399806340535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,64,128,1,float16,float16,0,0.03365333378314972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,64,0,1,fp8,fp8,0,0.10610666871070862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,64,0,1,float16,float16,0,0.09333333373069763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,64,128,1,float16,fp8,0,0.033376000821590424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,64,128,1,fp8,fp8,0,0.03348266581694285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,64,0,1,float16,fp8,0,0.0935093363126119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,64,128,1,float16,float16,0,0.03473066786924998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,64,0,1,fp8,fp8,0,0.08888000249862671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,64,0,1,float16,float16,0,0.09283733367919922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,64,128,1,float16,fp8,0,0.03508266558249792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,64,128,1,fp8,fp8,0,0.033386667569478355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,64,0,1,float16,fp8,0,0.0932373305161794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,64,128,1,float16,float16,0,0.034714666505654655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,64,0,1,fp8,fp8,0,0.08678399523099263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,64,0,1,float16,float16,0,0.09322133660316467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,64,128,1,float16,fp8,0,0.035616000493367515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,64,128,1,fp8,fp8,0,0.03336533407370249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,64,0,1,float16,fp8,0,0.09317866961161296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,64,0,1,fp8,fp8,0,0.08716266353925069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,64,128,1,float16,float16,0,0.2403786579767863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,64,128,1,float16,fp8,0,0.24282666047414145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,64,0,1,float16,float16,0,0.5142666498819987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,64,128,1,fp8,fp8,0,0.22819733619689941
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,64,128,1,float16,float16,0,0.25432000557581586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,64,0,1,float16,fp8,0,0.5185920000076294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,64,0,1,fp8,fp8,0,0.47861866156260174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,64,128,1,float16,fp8,0,0.2548266649246216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,64,128,1,fp8,fp8,0,0.24172266324361166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,64,0,1,float16,float16,0,0.5288159847259521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,64,128,1,float16,float16,0,0.142085333665212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,64,0,1,float16,fp8,0,0.531493345896403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,64,0,1,fp8,fp8,0,0.49116265773773193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,64,0,1,float16,float16,0,0.29362666606903076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,64,128,1,float16,fp8,0,0.1441973348458608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,64,128,1,fp8,fp8,0,0.13809067010879517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,64,0,1,float16,fp8,0,0.2951359947522481
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,64,128,1,float16,float16,0,0.12018133203188579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,64,0,1,fp8,fp8,0,0.27726932366689044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,64,128,1,float16,fp8,0,0.12290666500727336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,64,128,1,fp8,fp8,0,0.11981866757074992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,64,0,1,float16,float16,0,0.27448532978693646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,64,128,1,float16,float16,0,0.1269599994023641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,64,0,1,fp8,fp8,0,0.258240004380544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,64,0,1,float16,fp8,0,0.27542932828267414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,64,128,1,fp8,fp8,0,0.12787733475367227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,64,128,1,float16,fp8,0,0.13026133179664612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,64,0,1,float16,float16,0,0.2775786717732747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,64,128,1,float16,float16,0,0.07520000139872234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,64,0,1,fp8,fp8,0,0.2652906576792399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,64,0,1,float16,fp8,0,0.2813439965248108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,64,0,1,float16,float16,0,0.16289066274960837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,64,128,1,float16,fp8,0,0.07888533174991608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,64,128,1,fp8,fp8,0,0.07849599917729695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,64,0,1,float16,fp8,0,0.16548800468444824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,64,128,1,float16,float16,0,0.07055999835332234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,64,0,1,fp8,fp8,0,0.15685333808263144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,64,128,1,float16,fp8,0,0.07191466788450877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,64,128,1,fp8,fp8,0,0.06832533578077953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,64,0,1,float16,float16,0,0.16035200158754984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,64,0,1,float16,fp8,0,0.1607146660486857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,64,128,1,float16,float16,0,0.07220800220966339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,64,0,1,fp8,fp8,0,0.1467359960079193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,64,0,1,float16,float16,0,0.16087999939918518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,64,128,1,float16,fp8,0,0.07269333302974701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,64,128,1,fp8,fp8,0,0.07007466753323872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,64,128,1,float16,float16,0,0.045850664377212524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,64,0,1,float16,fp8,0,0.1611733337243398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,64,0,1,fp8,fp8,0,0.14890133341153464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,64,0,1,float16,float16,0,0.103493332862854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,64,128,1,float16,fp8,0,0.04781866570313772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,64,128,1,fp8,fp8,0,0.045509333411852516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,64,0,1,float16,fp8,0,0.10534399747848511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,64,128,1,float16,float16,0,0.04394666850566864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,64,0,1,fp8,fp8,0,0.09912533561388652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,64,0,1,float16,float16,0,0.10124799609184265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,64,128,1,float16,fp8,0,0.043893332282702126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,64,128,1,fp8,fp8,0,0.041877334316571556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,64,0,1,float16,fp8,0,0.10231999556223552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,64,0,1,fp8,fp8,0,0.09501333038012187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,64,128,1,float16,float16,0,0.044922664761543274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,64,0,1,float16,float16,0,0.10347200433413188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,64,128,1,float16,fp8,0,0.04570133487383524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,64,128,1,fp8,fp8,0,0.04351999859015147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,64,0,1,float16,fp8,0,0.10421866178512573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,64,128,1,float16,float16,0,0.03148266673088074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,64,0,1,fp8,fp8,0,0.09738133351008098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,64,0,1,float16,float16,0,0.0684853345155716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,64,128,1,float16,fp8,0,0.031258667508761086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,64,128,1,fp8,fp8,0,0.0315733328461647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,64,0,1,fp8,fp8,0,0.06620266536871593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,64,0,1,float16,fp8,0,0.07094933092594147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,64,128,1,float16,float16,0,0.029290666182835896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,64,0,1,float16,float16,0,0.06853333115577698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,64,128,1,float16,fp8,0,0.029504001140594482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,64,128,1,fp8,fp8,0,0.02958933264017105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,64,0,1,float16,fp8,0,0.06861333549022675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,64,128,1,float16,float16,0,0.030943999687830608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,64,0,1,fp8,fp8,0,0.06632533172766368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,64,0,1,float16,float16,0,0.0705813318490982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,64,128,1,float16,fp8,0,0.03134933362404505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,64,128,1,fp8,fp8,0,0.03142400085926056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,64,0,1,float16,fp8,0,0.06856533388296764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,64,0,1,fp8,fp8,0,0.0646666685740153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,64,128,1,float16,float16,0,0.029509333272775013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,64,0,1,float16,float16,0,0.0683840016523997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,64,128,1,float16,fp8,0,0.02924799919128418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,64,128,1,fp8,fp8,0,0.027888000011444092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,64,0,1,float16,fp8,0,0.06831466654936473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,64,0,1,fp8,fp8,0,0.06421866516272227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,64,128,1,float16,float16,0,0.029616000751654308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,64,0,1,float16,float16,0,0.06844800213972728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,64,128,1,float16,fp8,0,0.02922666569550832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,64,128,1,fp8,fp8,0,0.02924266705910365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,64,0,1,float16,fp8,0,0.06830400228500366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,64,0,1,fp8,fp8,0,0.06259733438491821
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,64,128,1,float16,float16,0,0.029365333418051403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,64,0,1,float16,float16,0,0.06668800115585327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,64,128,1,float16,fp8,0,0.02934933453798294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,64,128,1,fp8,fp8,0,0.02758399893840154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,64,0,1,float16,fp8,0,0.06673599779605865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,64,0,1,fp8,fp8,0,0.06238399942715963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,64,128,1,float16,float16,0,0.1841599941253662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,64,128,1,float16,fp8,0,0.1855093240737915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,64,0,1,float16,float16,0,0.33530668417612713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,64,128,1,fp8,fp8,0,0.17543999354044595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,64,0,1,float16,fp8,0,0.33953599135080975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,64,0,1,fp8,fp8,0,0.31199999650319415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,64,128,1,float16,float16,0,0.19491199652353922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,64,128,1,float16,fp8,0,0.19418134291966757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,64,0,1,float16,float16,0,0.34457600116729736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,64,128,1,fp8,fp8,0,0.187226672967275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,64,128,1,float16,float16,0,0.10957333445549011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,64,0,1,fp8,fp8,0,0.3229440053304036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,64,0,1,float16,fp8,0,0.34513600667317706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,64,0,1,float16,float16,0,0.1941866676012675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,64,128,1,float16,fp8,0,0.11168000102043152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,64,128,1,fp8,fp8,0,0.11011733611424764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,64,0,1,float16,fp8,0,0.19609065850575766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,64,128,1,float16,float16,0,0.09686400492986043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,64,0,1,fp8,fp8,0,0.18752533197402954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,64,0,1,float16,float16,0,0.18020800749460855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,64,128,1,float16,fp8,0,0.0974133312702179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,64,128,1,fp8,fp8,0,0.09281599521636963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,64,128,1,float16,float16,0,0.09963200489679973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,64,0,1,float16,fp8,0,0.1825760006904602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,64,0,1,fp8,fp8,0,0.16899732748667398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,64,128,1,float16,fp8,0,0.10245333115259807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,64,128,1,fp8,fp8,0,0.09886933366457622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,64,0,1,float16,float16,0,0.185263991355896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,64,128,1,float16,float16,0,0.0599839985370636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,64,0,1,float16,fp8,0,0.18549333016077676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,64,0,1,fp8,fp8,0,0.1755733291308085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,64,0,1,float16,float16,0,0.11079999804496765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,64,128,1,float16,fp8,0,0.062208001812299095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,64,128,1,fp8,fp8,0,0.062234664956728615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,64,0,1,float16,fp8,0,0.11282666524251302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,64,128,1,float16,float16,0,0.0563679983218511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,64,0,1,fp8,fp8,0,0.10682132840156555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,64,0,1,float16,float16,0,0.1071626643339793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,64,128,1,float16,fp8,0,0.056261335810025535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,64,128,1,fp8,fp8,0,0.05385066568851471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,64,0,1,float16,fp8,0,0.10874666770299275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,64,0,1,fp8,fp8,0,0.10028800368309021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,64,128,1,float16,float16,0,0.05826666454474131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,64,0,1,float16,float16,0,0.10945066809654236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,64,128,1,float16,fp8,0,0.059936001896858215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,64,128,1,fp8,fp8,0,0.05629866818586985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,64,0,1,float16,fp8,0,0.11030399799346924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,64,128,1,float16,float16,0,0.04153066625197729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,64,0,1,fp8,fp8,0,0.10322667161623637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,64,0,1,float16,float16,0,0.07281066477298737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,64,128,1,float16,fp8,0,0.04162666698296865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,64,128,1,fp8,fp8,0,0.04137066751718521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,64,0,1,float16,fp8,0,0.07499200105667114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,64,0,1,fp8,fp8,0,0.0684853345155716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,64,128,1,float16,float16,0,0.03976000100374222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,64,0,1,float16,float16,0,0.07054399947325389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,64,128,1,float16,fp8,0,0.03978666663169861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,64,128,1,fp8,fp8,0,0.037471999724706016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,64,0,1,float16,fp8,0,0.07028799752394359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,64,0,1,fp8,fp8,0,0.06612800061702728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,64,128,1,float16,float16,0,0.039701332648595176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,64,128,1,float16,fp8,0,0.0414986660083135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,64,0,1,float16,float16,0,0.0724426656961441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,64,128,1,fp8,fp8,0,0.03789866715669632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,64,128,1,float16,float16,0,0.02926933268706004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,64,0,1,fp8,fp8,0,0.06817600131034851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,64,0,1,float16,float16,0,0.05605333546797434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,64,0,1,float16,fp8,0,0.07276266813278198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,64,128,1,float16,fp8,0,0.02941333254178365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,64,128,1,fp8,fp8,0,0.027456000447273254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,64,0,1,float16,fp8,0,0.05615466833114624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,64,0,1,fp8,fp8,0,0.05417599777380625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,64,128,1,float16,float16,0,0.027477333943049114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,64,0,1,float16,float16,0,0.05421866476535797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,64,128,1,float16,fp8,0,0.02900800108909607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,64,128,1,fp8,fp8,0,0.027258666853109997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,64,0,1,float16,fp8,0,0.05613866448402405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,64,0,1,fp8,fp8,0,0.05177066723505656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,64,128,1,float16,float16,0,0.0296426663796107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,64,0,1,float16,float16,0,0.05625600119431814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,64,128,1,float16,fp8,0,0.02735999971628189
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,64,128,1,fp8,fp8,0,0.027445333699385326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,64,0,1,float16,fp8,0,0.05606399973233541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,64,0,1,fp8,fp8,0,0.054058666030565895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,64,128,1,float16,float16,0,0.02756800005833308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,64,0,1,float16,float16,0,0.05410666763782501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,64,128,1,float16,fp8,0,0.02752000093460083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,64,128,1,fp8,fp8,0,0.027450665831565857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,64,0,1,float16,fp8,0,0.05440000196297964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,64,128,1,float16,float16,0,0.02743999908367793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,64,0,1,fp8,fp8,0,0.05193066596984863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,64,0,1,float16,float16,0,0.05392533540725708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,64,128,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,64,128,1,fp8,fp8,0,0.025274666647116344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,64,0,1,float16,fp8,0,0.054197331269582115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,64,0,1,fp8,fp8,0,0.052144000927607216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,64,128,1,float16,float16,0,0.025631998976071674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,64,0,1,float16,float16,0,0.054154664278030396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,64,128,1,float16,fp8,0,0.02738133321205775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,64,128,1,fp8,fp8,0,0.026672000686327618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,64,0,1,float16,fp8,0,0.054330666859944664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,64,0,1,fp8,fp8,0,0.050928001602490745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,64,128,1,float16,float16,0,0.24435200293858847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,64,128,1,float16,fp8,0,0.24393065770467123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,64,0,1,float16,float16,0,0.3650826613108317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,64,128,1,fp8,fp8,0,0.23084799448649088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,64,0,1,float16,fp8,0,0.35950934886932373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,64,0,1,fp8,fp8,0,0.3362133502960205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,64,128,1,float16,float16,0,0.25622399648030597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,64,0,1,float16,float16,0,0.37281068166097003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,64,128,1,float16,fp8,0,0.2598293423652649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,64,128,1,fp8,fp8,0,0.24087466796239218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,64,128,1,float16,float16,0,0.14110933740933737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,64,0,1,float16,fp8,0,0.3757173220316569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,64,0,1,fp8,fp8,0,0.3476693232854207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,64,0,1,float16,float16,0,0.2076853315035502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,64,128,1,float16,fp8,0,0.1423733333746592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,64,128,1,fp8,fp8,0,0.14165332913398743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,64,0,1,float16,fp8,0,0.21039466063181558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,64,0,1,fp8,fp8,0,0.19885333379109701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,64,128,1,float16,float16,0,0.1225920021533966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,64,0,1,float16,float16,0,0.18877865870793661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,64,128,1,float16,fp8,0,0.1262933313846588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,64,128,1,fp8,fp8,0,0.12197867035865784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,64,0,1,float16,fp8,0,0.18842132886250815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,64,0,1,fp8,fp8,0,0.1814346710840861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,64,128,1,float16,float16,0,0.13200533390045166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,64,128,1,float16,fp8,0,0.13276267051696777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,64,128,1,fp8,fp8,0,0.12804800271987915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,64,0,1,float16,float16,0,0.1933013399442037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,64,128,1,float16,float16,0,0.07573333382606506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,64,0,1,float16,fp8,0,0.19644800821940103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,64,0,1,fp8,fp8,0,0.18595733245213827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,64,0,1,float16,float16,0,0.11336533228556316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,64,128,1,float16,fp8,0,0.0788266658782959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,64,128,1,fp8,fp8,0,0.07879466811815898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,64,0,1,float16,fp8,0,0.11569066842397054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,64,128,1,float16,float16,0,0.07030933101971944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,64,0,1,fp8,fp8,0,0.11107200384140015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,64,0,1,float16,float16,0,0.10610666871070862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,64,128,1,float16,fp8,0,0.07249066730340321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,64,128,1,fp8,fp8,0,0.06653333206971486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,64,0,1,float16,fp8,0,0.10830400387446086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,64,128,1,float16,float16,0,0.07203199962774913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,64,0,1,fp8,fp8,0,0.10141332944234212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,64,0,1,float16,float16,0,0.10689066847165425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,64,128,1,float16,fp8,0,0.07632000247637431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,64,128,1,fp8,fp8,0,0.07080533107121785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,64,128,1,float16,float16,0,0.04571199913819631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,64,0,1,fp8,fp8,0,0.10320533315340678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,64,0,1,float16,float16,0,0.07173333565394084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,64,0,1,float16,fp8,0,0.11034666498502095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,64,128,1,float16,fp8,0,0.04788800080617269
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,64,128,1,fp8,fp8,0,0.04404800136884054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,64,0,1,float16,fp8,0,0.07235200206438701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,64,128,1,float16,float16,0,0.04377600053946177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,64,0,1,fp8,fp8,0,0.06966400146484375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,64,0,1,float16,float16,0,0.06863999863465627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,64,128,1,float16,fp8,0,0.04391466577847799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,64,128,1,fp8,fp8,0,0.0405973345041275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,64,128,1,float16,float16,0,0.04577066500981649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,64,0,1,fp8,fp8,0,0.06448533137639363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,64,0,1,float16,fp8,0,0.0691840002934138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,64,0,1,float16,float16,0,0.06844266752401988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,64,128,1,float16,fp8,0,0.04563199977080027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,64,128,1,fp8,fp8,0,0.04366933306058248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,64,0,1,float16,fp8,0,0.07010133564472198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,64,0,1,fp8,fp8,0,0.06646400193373363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,64,128,1,float16,float16,0,0.029279999434947968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,64,0,1,float16,float16,0,0.045978665351867676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,64,128,1,float16,fp8,0,0.030693332354227703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,64,128,1,fp8,fp8,0,0.03029866764942805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,64,0,1,float16,fp8,0,0.04718933502833048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,64,0,1,fp8,fp8,0,0.043925335009892784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,64,128,1,float16,float16,0,0.029088000456492107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,64,0,1,float16,float16,0,0.04560000201066335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,64,128,1,float16,fp8,0,0.02962133288383484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,64,128,1,fp8,fp8,0,0.029167999823888142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,64,0,1,float16,fp8,0,0.04632000128428141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,64,0,1,fp8,fp8,0,0.043552001317342125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,64,128,1,float16,float16,0,0.02961066613594691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,64,0,1,float16,float16,0,0.047877331574757896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,64,128,1,float16,fp8,0,0.030949334303538006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,64,128,1,fp8,fp8,0,0.029365333418051403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,64,0,1,float16,fp8,0,0.048112000028292336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,64,0,1,fp8,fp8,0,0.04502399762471517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,64,128,1,float16,float16,0,0.027109332382678986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,64,0,1,float16,float16,0,0.04193066557248434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,64,128,1,float16,fp8,0,0.02740799884001414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,64,128,1,fp8,fp8,0,0.02518400053183238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,64,0,1,float16,fp8,0,0.04385599990685781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,64,0,1,fp8,fp8,0,0.04178666571776072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,64,128,1,float16,float16,0,0.02640533447265625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,64,0,1,float16,float16,0,0.0435146689414978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,64,128,1,float16,fp8,0,0.02609066665172577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,64,128,1,fp8,fp8,0,0.025253333151340485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,64,0,1,float16,fp8,0,0.04182399809360504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,64,0,1,fp8,fp8,0,0.03959999978542328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,64,128,1,float16,float16,0,0.025418666501839954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,64,0,1,float16,float16,0,0.04215999941031138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,64,128,1,float16,fp8,0,0.025290665527184803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,64,128,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,64,0,1,float16,fp8,0,0.042133331298828125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,64,0,1,fp8,fp8,0,0.04172799984614054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,64,128,1,float16,float16,0,0.023706667125225067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,64,0,1,float16,float16,0,0.04162133236726125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,64,128,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,64,128,1,fp8,fp8,0,0.023413332800070446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,64,0,1,float16,fp8,0,0.042261332273483276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,64,0,1,fp8,fp8,0,0.03957866628964742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,64,128,1,float16,float16,0,0.025487999121348064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,64,0,1,float16,float16,0,0.04185600082079569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,64,128,1,float16,fp8,0,0.025231999655564625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,64,128,1,fp8,fp8,0,0.023685333629449207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,64,0,1,float16,fp8,0,0.04171733558177948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,64,0,1,fp8,fp8,0,0.039477333426475525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,64,128,1,float16,float16,0,0.023397333920001984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,64,0,1,float16,float16,0,0.04182399809360504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,64,128,1,float16,fp8,0,0.023408000667889912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,64,128,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,64,0,1,float16,fp8,0,0.04204266766707102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,64,0,1,fp8,fp8,0,0.03965866565704346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,64,128,1,float16,float16,0,0.20572266976038614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,64,0,1,float16,float16,0,0.26607465744018555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,64,128,1,float16,fp8,0,0.20587732394536337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,64,128,1,fp8,fp8,0,0.19741332530975342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,64,0,1,float16,fp8,0,0.26631999015808105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,64,0,1,fp8,fp8,0,0.24922666947046915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,64,128,1,float16,float16,0,0.21490132808685303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,64,128,1,float16,fp8,0,0.21278399229049683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,64,0,1,float16,float16,0,0.27491732438405353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,64,128,1,fp8,fp8,0,0.20336000124613443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,64,0,1,fp8,fp8,0,0.2564479907353719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,64,0,1,float16,fp8,0,0.27290666103363037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,64,128,1,float16,float16,0,0.11959999799728394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,64,128,1,float16,fp8,0,0.1200213332970937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,64,128,1,fp8,fp8,0,0.11897599697113037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,64,0,1,float16,float16,0,0.15281066298484802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,64,0,1,float16,fp8,0,0.1527733306090037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,64,128,1,float16,float16,0,0.10248532891273499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,64,0,1,fp8,fp8,0,0.1495733360449473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,64,128,1,float16,fp8,0,0.10478400190671285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,64,128,1,fp8,fp8,0,0.09657067060470581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,64,0,1,float16,float16,0,0.13486933708190918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,64,0,1,float16,fp8,0,0.13820800185203552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,64,128,1,float16,float16,0,0.10648000240325928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,64,0,1,fp8,fp8,0,0.12854400277137756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,64,128,1,float16,fp8,0,0.10829333464304607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,64,128,1,fp8,fp8,0,0.1055626670519511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,64,0,1,float16,float16,0,0.139984001715978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,64,128,1,float16,float16,0,0.06414933502674103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,64,0,1,float16,fp8,0,0.1399893363316854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,64,0,1,fp8,fp8,0,0.13591999808947244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,64,0,1,float16,float16,0,0.08521067102750142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,64,128,1,float16,fp8,0,0.06470400094985962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,64,128,1,fp8,fp8,0,0.06242666641871134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,64,0,1,float16,fp8,0,0.08480532964070638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,64,128,1,float16,float16,0,0.059994667768478394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,64,0,1,fp8,fp8,0,0.08056533336639404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,64,0,1,float16,float16,0,0.0816480020682017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,64,128,1,float16,fp8,0,0.06014933188756307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,64,128,1,fp8,fp8,0,0.05638400216897329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,64,0,1,float16,fp8,0,0.0816480020682017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,64,128,1,float16,float16,0,0.06214400132497152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,64,0,1,fp8,fp8,0,0.07494933406511943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,64,0,1,float16,float16,0,0.08295466502507527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,64,128,1,float16,fp8,0,0.06223999957243601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,64,128,1,fp8,fp8,0,0.05950933198134104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,64,128,1,float16,float16,0,0.04247466723124186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,64,0,1,float16,fp8,0,0.08293333152929942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,64,0,1,fp8,fp8,0,0.0786240001519521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,64,0,1,float16,float16,0,0.05382933219273885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,64,128,1,float16,fp8,0,0.04366933306058248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,64,0,1,float16,fp8,0,0.055919999877611794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,64,128,1,fp8,fp8,0,0.04041599979003271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,64,0,1,fp8,fp8,0,0.051813334226608276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,64,128,1,float16,float16,0,0.03980266551176707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,64,0,1,float16,float16,0,0.052101333936055504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,64,128,1,float16,fp8,0,0.04114133367935816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,64,128,1,fp8,fp8,0,0.037903999288876854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,64,0,1,float16,fp8,0,0.05409599840641022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,64,0,1,fp8,fp8,0,0.04987733562787374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,64,128,1,float16,float16,0,0.04124266654253006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,64,0,1,float16,float16,0,0.052416001756985985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,64,128,1,float16,fp8,0,0.041722665230433144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,64,128,1,fp8,fp8,0,0.03982399900754293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,64,0,1,float16,fp8,0,0.05386666456858317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,64,0,1,fp8,fp8,0,0.05106133222579956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,64,128,1,float16,float16,0,0.029290666182835896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,64,0,1,float16,float16,0,0.0399893323580424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,64,128,1,fp8,fp8,0,0.02755733331044515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,64,128,1,float16,fp8,0,0.029605334003766377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,64,0,1,float16,fp8,0,0.03994133323431015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,64,0,1,fp8,fp8,0,0.0390079990029335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,64,128,1,float16,float16,0,0.027701333165168762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,64,0,1,float16,float16,0,0.03773866593837738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,64,128,1,float16,fp8,0,0.027850667635599773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,64,128,1,fp8,fp8,0,0.025445332129796345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,64,0,1,float16,fp8,0,0.03827733298142751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,64,0,1,fp8,fp8,0,0.03583999971548716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,64,128,1,float16,float16,0,0.02752000093460083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,64,0,1,float16,float16,0,0.03949866692225138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,64,128,1,fp8,fp8,0,0.027130665878454845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,64,128,1,float16,fp8,0,0.02758399893840154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,64,0,1,float16,fp8,0,0.03973866750796636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,64,0,1,fp8,fp8,0,0.03750933210055033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,64,128,1,float16,float16,0,0.024869332710901897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,64,0,1,float16,float16,0,0.03549333413441976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,64,128,1,float16,fp8,0,0.025583999852339428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,64,128,1,fp8,fp8,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,64,0,1,float16,fp8,0,0.035487999518712364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,64,0,1,fp8,fp8,0,0.0354720006386439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,64,128,1,float16,float16,0,0.025392000873883564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,64,0,1,float16,float16,0,0.035599999129772186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,64,128,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,64,128,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,64,0,1,float16,fp8,0,0.03551466763019562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,64,0,1,fp8,fp8,0,0.033530667424201965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,64,128,1,float16,float16,0,0.02347733328739802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,64,0,1,float16,float16,0,0.03569599986076355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,64,128,1,float16,fp8,0,0.02332799881696701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,64,128,1,fp8,fp8,0,0.023605334262053173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,64,0,1,float16,fp8,0,0.037231999138991036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,64,0,1,fp8,fp8,0,0.03368533402681351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,64,128,1,float16,float16,0,0.02327999969323476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,64,0,1,float16,float16,0,0.035418666899204254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,64,128,1,float16,fp8,0,0.023391999304294586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,64,128,1,fp8,fp8,0,0.02310933421055476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,64,0,1,float16,fp8,0,0.03547733277082443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,64,0,1,fp8,fp8,0,0.031957333286603294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,64,128,1,float16,float16,0,0.023413332800070446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,64,0,1,float16,float16,0,0.033359999457995095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,64,128,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,64,128,1,fp8,fp8,0,0.023226665953795116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,64,0,1,float16,fp8,0,0.033333333830038704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,64,0,1,fp8,fp8,0,0.03267733256022135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,64,128,1,float16,float16,0,0.02311466634273529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,64,0,1,float16,float16,0,0.033514666060606636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,64,128,1,float16,fp8,0,0.02330133318901062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,64,128,1,fp8,fp8,0,0.021589333812395733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,64,0,1,float16,fp8,0,0.03363200028737386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,64,0,1,fp8,fp8,0,0.03341866781314214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,64,128,1,float16,float16,0,0.20843732357025146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,64,0,1,float16,float16,0,0.2500320076942444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,64,128,1,float16,fp8,0,0.20881066719690958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,64,128,1,fp8,fp8,0,0.19550933440526327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,64,0,1,float16,fp8,0,0.24619734287261963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,64,0,1,fp8,fp8,0,0.23248533407847086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,64,128,1,float16,float16,0,0.2132906715075175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,64,128,1,float16,fp8,0,0.21337066094080606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,64,0,1,float16,float16,0,0.2511253356933594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,64,128,1,fp8,fp8,0,0.20803733666737875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,64,0,1,float16,fp8,0,0.2509066661198934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,64,0,1,fp8,fp8,0,0.24080000321070352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,64,128,1,float16,float16,0,0.12121599912643433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,64,128,1,float16,fp8,0,0.1197760005791982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,64,0,1,float16,float16,0,0.14433067043622336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,64,128,1,fp8,fp8,0,0.12052800258000691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,64,0,1,float16,fp8,0,0.14283733566602072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,64,0,1,fp8,fp8,0,0.1402506629625956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,64,128,1,float16,float16,0,0.11127466956774394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,64,128,1,float16,fp8,0,0.1111306647459666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,64,0,1,float16,float16,0,0.1346453328927358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,64,128,1,fp8,fp8,0,0.10598933696746826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,64,0,1,float16,fp8,0,0.13496533036231995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,64,128,1,float16,float16,0,0.11543466647466023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,64,0,1,fp8,fp8,0,0.12665067116419473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,64,0,1,float16,float16,0,0.1372160017490387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,64,128,1,fp8,fp8,0,0.11414399743080139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,64,0,1,float16,fp8,0,0.13609600067138672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,64,128,1,float16,fp8,0,0.11425066987673442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,64,0,1,fp8,fp8,0,0.1328266660372416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,64,128,1,float16,float16,0,0.06636266907056172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,64,0,1,float16,float16,0,0.07845866680145264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,64,128,1,float16,fp8,0,0.06840000053246816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,64,128,1,fp8,fp8,0,0.06609066824118297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,64,0,1,float16,fp8,0,0.07866133252779643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,64,0,1,fp8,fp8,0,0.07648000121116638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,64,128,1,float16,float16,0,0.06447466711203258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,64,0,1,float16,float16,0,0.07478400071461995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,64,128,1,float16,fp8,0,0.06407999992370605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,64,128,1,fp8,fp8,0,0.06195199986298879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,64,0,1,fp8,fp8,0,0.07196266452471416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,64,0,1,float16,fp8,0,0.07608533402283986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,64,128,1,float16,float16,0,0.06431999802589417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,64,0,1,float16,float16,0,0.07684800028800964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,64,128,1,float16,fp8,0,0.0644053320089976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,64,0,1,float16,fp8,0,0.07554133236408234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,64,128,1,fp8,fp8,0,0.06253333389759064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,64,0,1,fp8,fp8,0,0.0731573353211085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,64,128,1,float16,float16,0,0.04203199843565623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,64,0,1,float16,float16,0,0.0498986691236496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,64,128,1,float16,fp8,0,0.04178666571776072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,64,128,1,fp8,fp8,0,0.041989331444104515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,64,0,1,float16,fp8,0,0.05003733436266581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,64,0,1,fp8,fp8,0,0.05026666820049286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,64,128,1,float16,float16,0,0.03979733337958654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,64,0,1,float16,float16,0,0.04990933338801066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,64,128,1,float16,fp8,0,0.041936000188191734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,64,128,1,fp8,fp8,0,0.03969600051641464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,64,0,1,float16,fp8,0,0.04981866478919983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,64,0,1,fp8,fp8,0,0.04771733283996582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,64,128,1,float16,float16,0,0.0418453315893809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,64,0,1,float16,float16,0,0.0497920016447703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,64,128,1,float16,fp8,0,0.04148799926042557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,64,128,1,fp8,fp8,0,0.0400693342089653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,64,0,1,float16,fp8,0,0.049653331438700356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,64,0,1,fp8,fp8,0,0.049786667029062905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,64,128,1,float16,float16,0,0.02752533306678136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,64,0,1,float16,float16,0,0.033376000821590424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,64,128,1,float16,fp8,0,0.027637332677841187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,64,128,1,fp8,fp8,0,0.027488000690937042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,64,0,1,float16,fp8,0,0.03339199970165888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,64,0,1,fp8,fp8,0,0.03350399931271871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,64,128,1,float16,float16,0,0.02701333413521449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,64,0,1,float16,float16,0,0.033285332222779594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,64,128,1,float16,fp8,0,0.027589333554108936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,64,128,1,fp8,fp8,0,0.025568000972270966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,64,0,1,float16,fp8,0,0.03340800106525421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,64,0,1,fp8,fp8,0,0.03143999973932902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,64,128,1,float16,float16,0,0.027786667148272198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,64,0,1,float16,float16,0,0.03169066707293192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,64,128,1,float16,fp8,0,0.027317332724730175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,64,128,1,fp8,fp8,0,0.027471999327341717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,64,0,1,float16,fp8,0,0.03158933420976003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,64,0,1,fp8,fp8,0,0.03143999973932902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,64,128,1,float16,float16,0,0.023157333334287006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,64,0,1,float16,float16,0,0.029194665451844532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,64,128,1,float16,fp8,0,0.023418667415777843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,64,128,1,fp8,fp8,0,0.02288000037272771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,64,0,1,float16,fp8,0,0.02940266579389572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,64,0,1,fp8,fp8,0,0.029258665939172108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,64,128,1,float16,float16,0,0.0233599990606308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,64,0,1,float16,float16,0,0.029674666623274486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,64,128,1,float16,fp8,0,0.023242667317390442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,64,128,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,64,0,1,float16,fp8,0,0.029290666182835896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,64,0,1,fp8,fp8,0,0.02942399928967158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,64,128,1,float16,float16,0,0.023546665906906128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,64,0,1,float16,float16,0,0.029445332785447437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,64,128,1,float16,fp8,0,0.023589332898457844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,64,128,1,fp8,fp8,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,64,0,1,float16,fp8,0,0.02958933264017105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,64,0,1,fp8,fp8,0,0.02773866554101308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,64,128,1,float16,float16,0,0.022554665803909302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,64,0,1,float16,float16,0,0.02773866554101308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,64,128,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,64,128,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,64,0,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,64,0,1,fp8,fp8,0,0.02775466690460841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,64,128,1,float16,float16,0,0.021040000021457672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,64,0,1,float16,float16,0,0.027488000690937042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,64,128,1,float16,fp8,0,0.02274133265018463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,64,128,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,64,0,1,float16,fp8,0,0.027503999571005504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,64,0,1,fp8,fp8,0,0.025306666890780132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,64,128,1,float16,float16,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,64,0,1,float16,float16,0,0.027258666853109997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,64,128,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,64,128,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,64,0,1,float16,fp8,0,0.02757866680622101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,64,0,1,fp8,fp8,0,0.02722666660944621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,64,128,1,float16,float16,0,0.02160533269246419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,64,0,1,float16,float16,0,0.02714666724205017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,64,128,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,64,128,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,64,0,1,float16,fp8,0,0.02717333287000656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,64,0,1,fp8,fp8,0,0.027237333357334137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,64,128,1,float16,float16,0,0.021029333273569744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,64,0,1,float16,float16,0,0.0271573339899381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,64,128,1,float16,fp8,0,0.02149333308140437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,64,128,1,fp8,fp8,0,0.019386666516462963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,64,0,1,float16,fp8,0,0.027130665878454845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,64,0,1,fp8,fp8,0,0.02738133321205775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,64,128,1,float16,float16,0,0.021338666478792827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,64,0,1,float16,float16,0,0.02720000098148982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,64,128,1,float16,fp8,0,0.02162133405605952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,64,128,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,64,0,1,float16,fp8,0,0.027242665489514668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,64,0,1,fp8,fp8,0,0.025621332228183746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,64,128,1,float16,float16,0,0.204474667708079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,64,0,1,float16,float16,0,0.20824533700942993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,64,128,1,float16,fp8,0,0.20243199666341147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,64,128,1,fp8,fp8,0,0.1927786668141683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,64,0,1,float16,fp8,0,0.20616000890731812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,64,0,1,fp8,fp8,0,0.19340266784032187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,64,128,1,float16,float16,0,0.20689600706100464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,64,0,1,float16,float16,0,0.21226133902867636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,64,128,1,float16,fp8,0,0.20361065864562988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,64,128,1,fp8,fp8,0,0.1983413298924764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,64,0,1,float16,fp8,0,0.20784000555674234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,64,128,1,float16,float16,0,0.11659199992815654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,64,0,1,float16,float16,0,0.12038933237393697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,64,0,1,fp8,fp8,0,0.20138132572174072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,64,128,1,float16,fp8,0,0.1147093375523885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,64,128,1,fp8,fp8,0,0.11619200309117635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,64,0,1,float16,fp8,0,0.11813867092132568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,64,0,1,fp8,fp8,0,0.11607467134793599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,64,128,1,float16,float16,0,0.10941333572069804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,64,0,1,float16,float16,0,0.11068800091743469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,64,128,1,float16,fp8,0,0.10894933342933655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,64,128,1,fp8,fp8,0,0.1030613382657369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,64,0,1,float16,fp8,0,0.11114133397738139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,64,0,1,fp8,fp8,0,0.10523200035095215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,64,128,1,float16,float16,0,0.11353066563606262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,64,0,1,float16,float16,0,0.11384532848993938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,64,128,1,float16,fp8,0,0.10992532968521118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,64,128,1,fp8,fp8,0,0.1116426686445872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,64,0,1,float16,fp8,0,0.11311466495196025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,64,0,1,fp8,fp8,0,0.11189333597819011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,64,128,1,float16,float16,0,0.06634666522343953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,64,128,1,float16,fp8,0,0.06421866516272227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,64,0,1,float16,float16,0,0.06613866488138835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,64,128,1,fp8,fp8,0,0.06459733347098033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,64,0,1,float16,fp8,0,0.06457066535949707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,64,0,1,fp8,fp8,0,0.06233599781990051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,64,128,1,float16,float16,0,0.06214933097362518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,64,0,1,float16,float16,0,0.06332266827424367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,64,128,1,float16,fp8,0,0.06241600215435028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,64,128,1,fp8,fp8,0,0.05834133426348368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,64,0,1,float16,fp8,0,0.06274133423964183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,64,0,1,fp8,fp8,0,0.06035199761390686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,64,128,1,float16,float16,0,0.06366399923960368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,64,0,1,float16,float16,0,0.06238933404286703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,64,128,1,float16,fp8,0,0.06270933151245117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,64,128,1,fp8,fp8,0,0.06061333417892456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,64,0,1,float16,fp8,0,0.06216000020503998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,64,128,1,float16,float16,0,0.041797334949175514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,64,0,1,fp8,fp8,0,0.060191998879114784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,64,0,1,float16,float16,0,0.041696002086003624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,64,128,1,float16,fp8,0,0.041738669077555336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,64,128,1,fp8,fp8,0,0.03980266551176707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,64,0,1,float16,fp8,0,0.04180799921353658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,64,0,1,fp8,fp8,0,0.041802664597829185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,64,128,1,float16,float16,0,0.039887999494870506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,64,0,1,float16,float16,0,0.04189866781234741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,64,128,1,float16,fp8,0,0.039813332259655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,64,128,1,fp8,fp8,0,0.039477333426475525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,64,0,1,float16,fp8,0,0.041893333196640015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,64,0,1,fp8,fp8,0,0.04142399877309799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,64,128,1,float16,float16,0,0.041573333243529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,64,0,1,float16,float16,0,0.04232533276081085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,64,128,1,float16,fp8,0,0.041850666205088295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,64,128,1,fp8,fp8,0,0.03961600114901861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,64,0,1,float16,fp8,0,0.042037333051363625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,64,0,1,fp8,fp8,0,0.03988266736268997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,64,128,1,float16,float16,0,0.0276053324341774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,64,0,1,float16,float16,0,0.027471999327341717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,64,128,1,float16,fp8,0,0.027503999571005504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,64,128,1,fp8,fp8,0,0.027327999472618103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,64,0,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,64,0,1,fp8,fp8,0,0.02972800036271413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,64,128,1,float16,float16,0,0.027456000447273254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,64,0,1,float16,float16,0,0.027552001178264618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,64,128,1,fp8,fp8,0,0.02537599951028824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,64,128,1,float16,fp8,0,0.02741333345572154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,64,0,1,float16,fp8,0,0.02717333287000656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,64,0,1,fp8,fp8,0,0.027322667340437572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,64,128,1,float16,float16,0,0.027669332921504974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,64,0,1,float16,float16,0,0.02740799884001414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,64,128,1,float16,fp8,0,0.02735466758410136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,64,128,1,fp8,fp8,0,0.027322667340437572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,64,0,1,fp8,fp8,0,0.027376001079877216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,64,0,1,float16,fp8,0,0.02849599967400233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,64,128,1,float16,float16,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,64,0,1,float16,float16,0,0.023402666052182514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,64,128,1,float16,fp8,0,0.023530667026837666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,64,128,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,64,0,1,float16,fp8,0,0.025642665723959606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,64,0,1,fp8,fp8,0,0.023354666928450268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,64,0,1,float16,float16,0,0.0233599990606308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,64,128,1,float16,fp8,0,0.023215999205907185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,64,128,1,fp8,fp8,0,0.021802666286627453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,64,0,1,float16,fp8,0,0.023546665906906128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,64,128,1,float16,float16,0,0.023183998962243397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,64,0,1,fp8,fp8,0,0.023045333723227184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,64,0,1,float16,float16,0,0.02332266668478648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,64,128,1,float16,fp8,0,0.0232640008131663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,64,128,1,fp8,fp8,0,0.023013333479563396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,64,0,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,64,0,1,fp8,fp8,0,0.02369600037733714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,64,128,1,float16,float16,0,0.022853332261244457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,64,0,1,float16,float16,0,0.02316266546646754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,64,128,1,float16,fp8,0,0.02317333221435547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,64,128,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,64,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,64,0,1,fp8,fp8,0,0.021525333325068157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,64,128,1,float16,float16,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,64,0,1,float16,float16,0,0.02312533309062322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,64,128,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,64,128,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,64,0,1,float16,fp8,0,0.02292799949645996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,64,0,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,64,128,1,float16,float16,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,64,0,1,float16,float16,0,0.023061332603295643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,64,128,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,64,128,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,64,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,64,0,1,fp8,fp8,0,0.02348266790310542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,64,128,1,float16,float16,0,0.021269333859284718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,64,0,1,float16,float16,0,0.02089600016673406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,64,128,1,float16,fp8,0,0.021557333568731945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,64,128,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,64,0,1,float16,fp8,0,0.02143999934196472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,64,0,1,fp8,fp8,0,0.02143466720978419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,64,128,1,float16,float16,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,64,0,1,float16,float16,0,0.021327999730904896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,64,128,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,64,128,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,64,0,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,64,0,1,fp8,fp8,0,0.021429332594076794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,64,128,1,float16,float16,0,0.021242665747801464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,64,0,1,float16,float16,0,0.021402666966120403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,64,128,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,64,128,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,64,0,1,float16,fp8,0,0.021712000171343487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,64,0,1,fp8,fp8,0,0.021221332252025604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,64,128,1,float16,float16,0,0.02094399929046631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,64,0,1,float16,float16,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,64,128,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,64,128,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,64,0,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,64,0,1,fp8,fp8,0,0.021397332350413006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,64,128,1,float16,float16,0,0.020986666282018025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,64,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,64,128,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,64,128,1,fp8,fp8,0,0.021381333470344543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,64,0,1,float16,fp8,0,0.02142400046189626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,64,0,1,fp8,fp8,0,0.021061333517233532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,64,128,1,float16,float16,0,0.021029333273569744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,64,0,1,float16,float16,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,64,128,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,64,128,1,fp8,fp8,0,0.019658666104078293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,64,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,64,0,1,fp8,fp8,0,0.021002667645613354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,64,128,1,float16,float16,0,0.10099200407663982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,64,0,1,float16,float16,0,0.09948266545931499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,64,128,1,float16,fp8,0,0.10102933645248413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,64,128,1,fp8,fp8,0,0.09504000345865886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,64,0,1,float16,fp8,0,0.0978559950987498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,64,0,1,fp8,fp8,0,0.09351999560991923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,64,0,1,float16,float16,0,0.10019200046857198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,64,128,1,float16,float16,0,0.10436266660690308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,64,128,1,float16,fp8,0,0.10307199756304423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,64,0,1,float16,fp8,0,0.099263995885849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,64,128,1,fp8,fp8,0,0.1033066709836324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,64,0,1,fp8,fp8,0,0.10001599788665771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,64,128,1,float16,float16,0,0.060640002290407814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,64,0,1,float16,float16,0,0.059690664211908974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,64,128,1,float16,fp8,0,0.05963733295599619
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,64,0,1,float16,fp8,0,0.058229332168896995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,64,128,1,fp8,fp8,0,0.06035199761390686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,64,0,1,fp8,fp8,0,0.05921066800753275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,64,128,1,float16,float16,0,0.057962665955225624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,64,0,1,float16,float16,0,0.05612266560395559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,64,128,1,float16,fp8,0,0.058117335041364036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,64,128,1,fp8,fp8,0,0.05597866574923197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,64,0,1,float16,fp8,0,0.056015998125076294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,64,0,1,fp8,fp8,0,0.053818667928377785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,64,128,1,float16,float16,0,0.05821866790453593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,64,0,1,float16,float16,0,0.05821866790453593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,64,128,1,float16,fp8,0,0.05830933153629303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,64,128,1,fp8,fp8,0,0.05675200124581655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,64,0,1,float16,fp8,0,0.05690666536490122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,64,0,1,fp8,fp8,0,0.05554133156935374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,64,128,1,float16,float16,0,0.039488000174363456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,64,0,1,float16,float16,0,0.037802666425704956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,64,128,1,fp8,fp8,0,0.03737599899371465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,64,128,1,float16,fp8,0,0.040037333965301514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,64,0,1,float16,fp8,0,0.03751466671625773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,64,0,1,fp8,fp8,0,0.037434667348861694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,64,128,1,float16,float16,0,0.03746666759252548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,64,0,1,float16,float16,0,0.03549333413441976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,64,128,1,float16,fp8,0,0.03750933210055033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,64,128,1,fp8,fp8,0,0.035717333356539406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,64,0,1,float16,fp8,0,0.0373333344856898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,64,0,1,fp8,fp8,0,0.03540800015131632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,64,0,1,float16,float16,0,0.035562666753927864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,64,128,1,float16,float16,0,0.037903999288876854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,64,128,1,float16,fp8,0,0.03822933385769526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,64,128,1,fp8,fp8,0,0.037477334340413414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,64,0,1,float16,fp8,0,0.03750933210055033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,64,0,1,fp8,fp8,0,0.03581333408753077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,64,128,1,float16,float16,0,0.02606933315594991
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,64,0,1,float16,float16,0,0.025546667476495106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,64,128,1,float16,fp8,0,0.02696000039577484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,64,128,1,fp8,fp8,0,0.02550933261712392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,64,0,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,64,0,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,64,128,1,float16,float16,0,0.025205334027608235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,64,0,1,float16,float16,0,0.023397333920001984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,64,128,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,64,128,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,64,0,1,float16,fp8,0,0.023455999791622162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,64,0,1,fp8,fp8,0,0.023669332265853882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,64,128,1,float16,float16,0,0.02521066615978877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,64,0,1,float16,float16,0,0.02350933353106181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,64,128,1,float16,fp8,0,0.024933333198229473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,64,128,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,64,0,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,64,0,1,fp8,fp8,0,0.02372266600529353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,64,128,1,float16,float16,0,0.021141332884629566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,64,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,64,128,1,float16,fp8,0,0.021722666919231415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,64,128,1,fp8,fp8,0,0.021381333470344543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,64,0,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,64,0,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,64,128,1,float16,float16,0,0.02149333308140437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,64,0,1,float16,float16,0,0.02117866774400075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,64,128,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,64,128,1,fp8,fp8,0,0.021514666577180225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,64,0,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,64,0,1,fp8,fp8,0,0.02107200026512146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,64,128,1,float16,float16,0,0.021104000508785248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,64,0,1,float16,float16,0,0.019733333339293797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,64,128,1,float16,fp8,0,0.02144533395767212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,64,128,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,64,0,1,float16,fp8,0,0.021205333371957142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,64,0,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,64,128,1,float16,float16,0,0.019519999623298645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,64,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,64,128,1,float16,fp8,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,64,128,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,64,0,1,float16,fp8,0,0.021290667355060577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,64,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,64,128,1,float16,float16,0,0.019296000401178997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,64,0,1,float16,float16,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,64,128,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,64,128,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,64,0,1,float16,fp8,0,0.01945066700379054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,64,0,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,64,128,1,float16,float16,0,0.019167999426523846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,64,0,1,float16,float16,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,64,128,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,64,128,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,64,0,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,64,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,64,128,1,float16,float16,0,0.019152000546455383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,64,0,1,float16,float16,0,0.019248000035683315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,64,128,1,float16,fp8,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,64,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,64,0,1,float16,fp8,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,64,0,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,64,128,1,float16,float16,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,64,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,64,128,1,float16,fp8,0,0.019546666493018467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,64,128,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,64,0,1,float16,fp8,0,0.01937599976857503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,64,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,64,128,1,float16,float16,0,0.019023999571800232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,64,0,1,float16,float16,0,0.019354666272799175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,64,128,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,64,128,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,64,0,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,64,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,64,128,1,float16,float16,0,0.01929066702723503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,64,0,1,float16,float16,0,0.018735999862353008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,64,128,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,64,128,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,64,0,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,64,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,64,128,1,float16,float16,0,0.019013332823912304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,64,0,1,float16,float16,0,0.019173332800467808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,64,128,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,64,128,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,64,0,1,float16,fp8,0,0.01923199991385142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,64,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,64,128,1,float16,float16,0,0.019093333433071773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,64,0,1,float16,float16,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,64,128,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,64,128,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,64,0,1,float16,fp8,0,0.018858666221300762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,64,0,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,64,128,1,float16,float16,0,0.019061333189407986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,64,0,1,float16,float16,0,0.018863999595244724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,64,128,1,float16,fp8,0,0.019413333386182785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,64,128,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,64,0,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,64,0,1,fp8,fp8,0,0.017370666066805523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,64,128,1,float16,float16,0,0.019173332800467808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,64,0,1,float16,float16,0,0.01929066702723503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,64,128,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,64,128,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,64,0,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,64,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,64,128,1,float16,float16,0,0.018746666610240936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,64,0,1,float16,float16,0,0.01937599976857503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,64,128,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,64,128,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,64,0,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,64,0,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,64,0,1,float16,float16,0,0.058176000912984215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,64,128,1,float16,fp8,0,0.059487998485565186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,64,128,1,float16,float16,0,0.05973333120346069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,64,0,1,float16,fp8,0,0.060165335734685264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,64,128,1,fp8,fp8,0,0.05596266686916351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,64,0,1,fp8,fp8,0,0.05649066468079885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,64,128,1,float16,float16,0,0.06018666426340739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,64,0,1,float16,float16,0,0.06001600126425425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,64,128,1,float16,fp8,0,0.05881600081920624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,64,128,1,fp8,fp8,0,0.05606399973233541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,64,0,1,float16,fp8,0,0.058090666929880776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,64,128,1,float16,float16,0,0.039994666973749794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,64,0,1,float16,float16,0,0.03952533255020777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,64,0,1,fp8,fp8,0,0.058362667759259544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,64,128,1,float16,fp8,0,0.03978666663169861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,64,128,1,fp8,fp8,0,0.03811733424663544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,64,0,1,float16,fp8,0,0.04027199993530909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,64,0,1,fp8,fp8,0,0.03794133414824804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,64,128,1,float16,float16,0,0.03775466730197271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,64,0,1,float16,float16,0,0.03979733337958654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,64,128,1,float16,fp8,0,0.039834665755430855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,64,128,1,fp8,fp8,0,0.03769599894682566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,64,0,1,float16,fp8,0,0.039520000418027244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,64,0,1,fp8,fp8,0,0.03760000069936117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,64,128,1,float16,float16,0,0.03961066653331121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,64,0,1,float16,float16,0,0.037791999677817024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,64,128,1,float16,fp8,0,0.03769599894682566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,64,128,1,fp8,fp8,0,0.03619199991226196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,64,0,1,fp8,fp8,0,0.036464000741640724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,64,0,1,float16,fp8,0,0.038165333370367684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,64,128,1,float16,float16,0,0.025477332373460133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,64,0,1,float16,float16,0,0.027248000105222065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,64,128,1,float16,fp8,0,0.02601066728432973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,64,128,1,fp8,fp8,0,0.02714666724205017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,64,0,1,float16,fp8,0,0.027376001079877216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,64,0,1,fp8,fp8,0,0.025216000775496166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,64,128,1,float16,float16,0,0.02573866645495097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,64,0,1,float16,float16,0,0.025962665677070618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,64,128,1,float16,fp8,0,0.027072000006834667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,64,128,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,64,0,1,float16,fp8,0,0.02625600000222524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,64,0,1,fp8,fp8,0,0.025487999121348064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,64,128,1,float16,float16,0,0.02749866743882497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,64,0,1,float16,float16,0,0.025637333591779072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,64,128,1,float16,fp8,0,0.025392000873883564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,64,128,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,64,0,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,64,128,1,float16,float16,0,0.019061333189407986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,64,0,1,fp8,fp8,0,0.025072000920772552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,64,0,1,float16,float16,0,0.018901333212852478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,64,128,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,64,128,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,64,0,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,64,128,1,float16,float16,0,0.01931200052301089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,64,0,1,fp8,fp8,0,0.019482667247454327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,64,0,1,float16,float16,0,0.01931200052301089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,64,128,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,64,128,1,fp8,fp8,0,0.019434666881958645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,64,0,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,64,0,1,fp8,fp8,0,0.019637333850065868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,64,128,1,float16,float16,0,0.01933866615096728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,64,0,1,float16,float16,0,0.01897066707412402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,64,128,1,float16,fp8,0,0.018735999862353008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,64,128,1,fp8,fp8,0,0.019546666493018467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,64,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,64,128,1,float16,float16,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,64,0,1,float16,float16,0,0.018565333137909572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,64,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,64,128,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,64,128,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,64,0,1,float16,fp8,0,0.017573333034912746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,64,128,1,float16,float16,0,0.01684800038735072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,64,0,1,fp8,fp8,0,0.017866666118303936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,64,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,64,128,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,64,128,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,64,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,64,0,1,float16,fp8,0,0.017711999515692394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,64,128,1,float16,float16,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,64,0,1,float16,float16,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,64,128,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,64,0,1,float16,fp8,0,0.018645333747069042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,64,128,1,fp8,fp8,0,0.017722666263580322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,64,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,64,128,1,float16,float16,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,64,128,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,64,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,64,0,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,64,128,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,64,0,1,float16,float16,0,0.01623999948302905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,64,128,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,64,128,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,64,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,64,128,1,float16,float16,0,0.016650666793187458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,64,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,64,128,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,64,0,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,64,128,1,float16,float16,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,64,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,64,128,1,float16,fp8,0,0.017583999782800674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,64,0,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,64,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,64,128,1,float16,float16,0,0.015381333728631338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,64,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,64,128,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,64,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,64,0,1,fp8,fp8,0,0.015461333096027374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,64,128,1,float16,float16,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,64,0,1,float16,float16,0,0.01623999948302905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,64,128,1,fp8,fp8,0,0.016602666427691776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,64,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,64,128,1,float16,float16,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,64,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,64,128,1,fp8,fp8,0,0.01616000011563301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,64,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,64,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,64,128,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,64,128,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,64,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,64,0,1,fp8,fp8,0,0.01573866605758667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,64,128,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,64,128,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,64,128,1,float16,float16,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,64,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,64,128,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,64,128,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,64,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,64,0,1,fp8,fp8,0,0.01581866666674614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,64,128,1,float16,float16,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,64,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,64,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,64,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,64,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,64,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,64,128,1,float16,fp8,0,0.016565332810084026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,64,0,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,64,0,1,fp8,fp8,0,0.015370666980743408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,64,128,1,float16,float16,0,0.045978665351867676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,64,128,1,float16,fp8,0,0.04619733492533366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,64,0,1,float16,float16,0,0.0459199994802475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,64,128,1,fp8,fp8,0,0.043791999419530235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,64,0,1,float16,fp8,0,0.04637333254019419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,64,0,1,fp8,fp8,0,0.04393066465854645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,64,128,1,float16,float16,0,0.048170665899912514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,64,0,1,float16,float16,0,0.04780266682306925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,64,128,1,float16,fp8,0,0.04809066653251648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,64,128,1,fp8,fp8,0,0.04552533229192098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,64,0,1,float16,fp8,0,0.04785599807898203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,64,0,1,fp8,fp8,0,0.04378133515516917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,64,128,1,float16,float16,0,0.03197866678237915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,64,0,1,float16,float16,0,0.03150933235883713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,64,128,1,float16,fp8,0,0.031386665999889374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,64,0,1,float16,fp8,0,0.03162133445342382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,64,128,1,fp8,fp8,0,0.03107733279466629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,64,0,1,fp8,fp8,0,0.029487999776999157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,64,128,1,float16,float16,0,0.029765332738558452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,64,0,1,float16,float16,0,0.031290667752424874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,64,128,1,float16,fp8,0,0.029365333418051403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,64,128,1,fp8,fp8,0,0.029333333174387615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,64,0,1,float16,fp8,0,0.030693332354227703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,64,0,1,fp8,fp8,0,0.02945599953333537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,64,0,1,float16,float16,0,0.03140799949566523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,64,128,1,float16,float16,0,0.03181333343187968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,64,128,1,float16,fp8,0,0.03160000095764796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,64,0,1,float16,fp8,0,0.031141333281993866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,64,128,1,fp8,fp8,0,0.02942933390537898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,64,0,1,fp8,fp8,0,0.029631999631722767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,64,0,1,float16,float16,0,0.023215999205907185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,64,128,1,float16,fp8,0,0.021722666919231415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,64,128,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,64,128,1,float16,float16,0,0.023397333920001984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,64,0,1,float16,fp8,0,0.0232640008131663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,64,0,1,fp8,fp8,0,0.02124800036350886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,64,128,1,float16,float16,0,0.020981334149837494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,64,0,1,float16,float16,0,0.02223466585079829
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,64,128,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,64,128,1,fp8,fp8,0,0.021429332594076794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,64,0,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,64,128,1,float16,float16,0,0.02147199958562851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,64,0,1,fp8,fp8,0,0.02216533323129018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,64,0,1,float16,float16,0,0.02141333371400833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,64,128,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,64,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,64,0,1,fp8,fp8,0,0.02181866765022278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,64,128,1,fp8,fp8,0,0.02162133405605952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,64,128,1,float16,float16,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,64,0,1,float16,float16,0,0.018768000106016796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,64,128,1,fp8,fp8,0,0.01841066653529803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,64,0,1,float16,fp8,0,0.018858666221300762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,64,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,64,0,1,float16,float16,0,0.017456000049908955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,64,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,64,0,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,64,0,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,64,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,64,128,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,64,128,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,64,0,1,float16,fp8,0,0.017573333034912746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,64,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,64,128,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,64,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,64,128,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,64,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,64,0,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,64,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,64,128,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,64,128,1,float16,float16,0,0.015413332730531693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,64,128,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,64,0,1,fp8,fp8,0,0.015552000453074774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,64,128,1,float16,float16,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,64,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,64,128,1,float16,fp8,0,0.017498667041460674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,64,128,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,64,0,1,float16,fp8,0,0.017456000049908955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,64,128,1,float16,float16,0,0.015263999501864115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,64,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,64,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,64,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,64,128,1,fp8,fp8,0,0.01766933376590411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,64,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,64,0,1,float16,float16,0,0.01764800027012825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,64,128,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,64,128,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,64,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,64,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,64,128,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,64,0,1,float16,float16,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,64,0,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,64,128,1,float16,float16,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,64,0,1,float16,float16,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,64,128,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,64,128,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,64,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,64,0,1,fp8,fp8,0,0.017370666066805523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,64,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,64,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,64,128,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,64,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,64,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,64,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,64,128,1,float16,float16,0,0.01543466622630755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,64,128,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,64,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,64,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,64,0,1,float16,float16,0,0.01543466622630755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,64,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,64,128,1,fp8,fp8,0,0.015637333194414776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,64,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,64,0,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,64,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,64,128,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,64,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,64,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,64,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,64,128,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,64,0,1,float16,fp8,0,0.01584533353646596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,64,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,64,128,1,float16,float16,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,64,0,1,float16,float16,0,0.016805333395799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,64,128,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,64,128,1,fp8,fp8,0,0.015647999942302704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,64,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,64,0,1,fp8,fp8,0,0.015504000087579092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,64,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,64,128,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,64,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,64,128,1,float16,float16,0,0.01642666632930438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,64,128,1,float16,fp8,0,0.015397333850463232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,64,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,64,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,64,0,1,float16,fp8,0,0.015647999942302704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,64,0,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,64,128,1,float16,float16,0,0.03774933268626531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,64,0,1,float16,float16,0,0.03833599885304769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,64,128,1,float16,fp8,0,0.04042666653792063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,64,128,1,fp8,fp8,0,0.035504000882307686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,64,0,1,float16,fp8,0,0.03950933367013931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,64,0,1,fp8,fp8,0,0.03621866554021835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,64,128,1,float16,float16,0,0.039701332648595176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,64,0,1,float16,float16,0,0.03997333347797394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,64,128,1,float16,fp8,0,0.039647998909155525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,64,128,1,fp8,fp8,0,0.03782933453718821
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,64,0,1,float16,fp8,0,0.03965333352486292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,64,128,1,float16,float16,0,0.02738133321205775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,64,0,1,float16,float16,0,0.027322667340437572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,64,0,1,fp8,fp8,0,0.03714133302370707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,64,128,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,64,128,1,fp8,fp8,0,0.027024000883102417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,64,0,1,float16,fp8,0,0.02733866622050603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,64,0,1,fp8,fp8,0,0.025301332275072735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,64,128,1,float16,float16,0,0.027610667049884796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,64,0,1,float16,float16,0,0.02773333340883255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,64,128,1,float16,fp8,0,0.02626666675011317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,64,128,1,fp8,fp8,0,0.02569599946339925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,64,0,1,float16,fp8,0,0.02736533433198929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,64,0,1,fp8,fp8,0,0.025557334224383037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,64,128,1,float16,float16,0,0.02743999908367793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,64,0,1,float16,float16,0,0.027306665976842243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,64,128,1,float16,fp8,0,0.027503999571005504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,64,128,1,fp8,fp8,0,0.02568000058333079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,64,0,1,float16,fp8,0,0.027674667537212372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,64,128,1,float16,float16,0,0.019280000279347103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,64,0,1,fp8,fp8,0,0.025450666745503742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,64,0,1,float16,float16,0,0.019354666272799175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,64,128,1,float16,fp8,0,0.020560000091791153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,64,128,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,64,0,1,float16,fp8,0,0.019813333948453266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,64,0,1,fp8,fp8,0,0.01958400011062622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,64,128,1,float16,float16,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,64,0,1,float16,float16,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,64,128,1,float16,fp8,0,0.020842666427294414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,64,128,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,64,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,64,0,1,fp8,fp8,0,0.02037866661945979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,64,128,1,float16,float16,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,64,0,1,float16,float16,0,0.019434666881958645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,64,128,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,64,128,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,64,0,1,float16,fp8,0,0.01937599976857503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,64,128,1,float16,float16,0,0.0164533331990242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,64,0,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,64,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,64,128,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,64,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,64,0,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,64,128,1,float16,float16,0,0.01676799977819125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,64,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,64,128,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,64,128,1,float16,float16,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,64,0,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,64,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,64,128,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,64,128,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,64,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,64,0,1,float16,fp8,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,64,128,1,fp8,fp8,0,0.015930666277805965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,64,0,1,float16,fp8,0,0.017621333400408428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,64,128,1,float16,float16,0,0.015381333728631338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,64,0,1,float16,float16,0,0.014831999937693277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,64,128,1,float16,fp8,0,0.01766933376590411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,64,128,1,fp8,fp8,0,0.015461333096027374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,64,0,1,fp8,fp8,0,0.015504000087579092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,64,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,64,128,1,float16,float16,0,0.015520000209410986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,64,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,64,128,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,64,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,64,128,1,float16,float16,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,64,0,1,fp8,fp8,0,0.015791999797026317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,64,128,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,64,128,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,64,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,64,128,1,float16,float16,0,0.01544533297419548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,64,0,1,float16,float16,0,0.015306666493415833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,64,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,64,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,64,128,1,float16,float16,0,0.017338667064905167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,64,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,64,128,1,float16,fp8,0,0.01595199977358182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,64,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,64,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,64,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,64,128,1,fp8,fp8,0,0.016778666526079178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,64,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,64,128,1,float16,float16,0,0.015285332997639975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,64,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,64,128,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,64,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,64,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,64,128,1,float16,float16,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,64,128,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,64,0,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,64,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,64,128,1,float16,float16,0,0.015317333241303762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,64,128,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,64,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,64,128,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,64,128,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,64,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,64,128,1,float16,float16,0,0.016832000265518825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,64,0,1,float16,float16,0,0.016656000167131424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,64,128,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,64,128,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,64,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,64,0,1,float16,float16,0,0.016741332908471424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,64,128,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,64,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,64,128,1,float16,float16,0,0.016117333124081295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,64,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,64,128,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,64,128,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,64,0,1,float16,fp8,0,0.015439999600251516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,64,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,64,128,1,float16,float16,0,0.016538667182127636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,64,128,1,float16,fp8,0,0.01488000030318896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,64,128,1,fp8,fp8,0,0.01682666689157486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,64,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,64,0,1,fp8,fp8,0,0.016314666718244553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,64,128,1,float16,float16,0,0.03513599932193756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,64,0,1,float16,float16,0,0.03579200059175491
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,64,128,1,float16,fp8,0,0.035418666899204254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,64,128,1,fp8,fp8,0,0.0335413341720899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,64,0,1,float16,fp8,0,0.035589332381884255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,64,0,1,fp8,fp8,0,0.03344533344109853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,64,128,1,float16,float16,0,0.03557866563399633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,64,0,1,float16,float16,0,0.03540800015131632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,64,128,1,float16,fp8,0,0.035786665976047516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,64,128,1,fp8,fp8,0,0.03356266766786575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,64,0,1,float16,fp8,0,0.03547733277082443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,64,0,1,fp8,fp8,0,0.03347733368476232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,64,128,1,float16,float16,0,0.025466665625572205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,64,0,1,float16,float16,0,0.02533866713444392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,64,128,1,float16,fp8,0,0.025221332907676697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,64,128,1,fp8,fp8,0,0.02550933261712392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,64,0,1,float16,fp8,0,0.025557334224383037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,64,0,1,fp8,fp8,0,0.023386667172114056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,64,0,1,float16,float16,0,0.025429333249727886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,64,128,1,float16,fp8,0,0.025386666258176167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,64,128,1,float16,float16,0,0.026170666019121807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,64,128,1,fp8,fp8,0,0.02310933421055476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,64,0,1,float16,fp8,0,0.025573333104451496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,64,0,1,fp8,fp8,0,0.023386667172114056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,64,128,1,float16,float16,0,0.025194667279720306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,64,0,1,float16,float16,0,0.025797332326571148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,64,128,1,float16,fp8,0,0.025349333882331848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,64,0,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,64,128,1,fp8,fp8,0,0.023552000522613525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,64,128,1,float16,float16,0,0.01921066641807556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,64,0,1,fp8,fp8,0,0.025285333395004272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,64,0,1,float16,float16,0,0.019248000035683315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,64,128,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,64,128,1,fp8,fp8,0,0.019567999988794327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,64,0,1,float16,fp8,0,0.019274666905403137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,64,0,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,64,128,1,float16,float16,0,0.019130667050679524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,64,0,1,float16,float16,0,0.018837332725524902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,64,128,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,64,128,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,64,0,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,64,0,1,fp8,fp8,0,0.019551999866962433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,64,128,1,float16,float16,0,0.018874666343132656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,64,0,1,float16,float16,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,64,128,1,float16,fp8,0,0.020853333175182343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,64,0,1,float16,fp8,0,0.019487999379634857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,64,128,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,64,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,64,128,1,fp8,fp8,0,0.01553600033124288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,64,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,64,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,64,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,64,128,1,fp8,fp8,0,0.017530667285124462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,64,0,1,fp8,fp8,0,0.017557332913080852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,64,128,1,float16,float16,0,0.01600533351302147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,64,128,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,64,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,64,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,64,0,1,fp8,fp8,0,0.016762666404247284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,64,128,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,64,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,64,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,64,128,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,64,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,64,128,1,float16,float16,0,0.014959999670584997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,64,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,64,128,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,64,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,64,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,64,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,64,128,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,64,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,64,128,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,64,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,64,128,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,64,128,1,float16,fp8,0,0.015568000574906668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,64,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,64,0,1,fp8,fp8,0,0.015386667102575302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,64,128,1,float16,float16,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,64,128,1,float16,fp8,0,0.015429332852363586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,64,128,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,64,0,1,fp8,fp8,0,0.01534933348496755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,64,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,64,128,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,64,128,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,64,0,1,fp8,fp8,0,0.018533332894245785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,64,0,1,float16,float16,0,0.016805333395799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,64,128,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,64,128,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,64,0,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,64,128,1,float16,float16,0,0.015381333728631338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,64,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,64,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,64,0,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,64,128,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,64,0,1,float16,float16,0,0.015376000354687372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,64,128,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,64,128,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,64,0,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,64,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,64,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,64,128,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,64,0,1,fp8,fp8,0,0.015546667079130808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,64,128,1,fp8,fp8,0,0.015381333728631338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,64,0,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,64,128,1,float16,float16,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,64,128,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,64,128,1,fp8,fp8,0,0.01553600033124288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,64,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,64,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,64,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,64,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,64,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,64,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,64,128,1,float16,float16,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,64,0,1,float16,float16,0,0.014965333044528961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,64,128,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,64,128,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,64,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,64,0,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,64,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,64,128,1,float16,fp8,0,0.015376000354687372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,64,128,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,64,0,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,64,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,0,0.033471999069054924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,0,0.03331733246644338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,0,0.03369600077470144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,4,1,64,128,1,fp8,fp8,0,0.031685332457224526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,0,0.03323733309904734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,4,1,64,0,1,fp8,fp8,0,0.03163733333349228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,0,0.03325333446264267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,0,0.03331200033426285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,0,0.03372266640265783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,4,2,64,128,1,fp8,fp8,0,0.03159466634194056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,0,0.0313226655125618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,4,2,64,0,1,fp8,fp8,0,0.03133866687615713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,4,64,128,1,float16,float16,0,0.023541333774725597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,0,0.02334933231274287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,4,64,128,1,float16,fp8,0,0.02346133440732956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,4,64,128,1,fp8,fp8,0,0.02325333406527837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,0,0.023247999449570973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,4,64,0,1,fp8,fp8,0,0.02329600105683009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,0,0.0234400009115537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,0,0.023221333821614582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,0,0.02330133318901062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,1,64,128,1,fp8,fp8,0,0.02316266546646754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,0,0.025397333006064098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,1,64,0,1,fp8,fp8,0,0.023445333043734234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,0,0.025040000677108765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,0,0.023370665808518726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,0,0.025093334416548412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,2,64,128,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,2,64,0,1,fp8,fp8,0,0.02329600105683009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,4,64,128,1,float16,float16,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,0,0.019141333798567455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,4,64,128,1,float16,fp8,0,0.01937599976857503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,4,64,128,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,0,0.01923199991385142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,4,64,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,1,64,128,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,0,0.019354666272799175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,1,64,0,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,2,64,128,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,2,64,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,4,64,128,1,float16,float16,0,0.015466666469971338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,4,64,128,1,float16,fp8,0,0.016714667280515034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,4,64,128,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,4,64,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,1,64,128,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,1,64,0,1,fp8,fp8,0,0.016586666305859882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,2,64,128,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,2,64,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,4,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,4,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,4,64,128,1,fp8,fp8,0,0.01674666628241539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,4,64,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,1,64,128,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,1,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,0,0.015392000476519266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,2,64,128,1,fp8,fp8,0,0.015552000453074774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,2,64,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,4,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,4,64,128,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,4,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,0,0.015376000354687372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,4,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,0,0.01684800038735072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,1,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,1,64,0,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,0,0.015392000476519266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,2,64,128,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,2,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,4,64,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,0,0.016704000532627106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,4,64,128,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,4,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,4,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,0,0.014912000546852747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,0,0.015935999651749928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,1,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,1,64,0,1,fp8,fp8,0,0.01605333387851715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,0,0.014869333555301031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,2,64,128,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,2,64,0,1,fp8,fp8,0,0.016538667182127636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,4,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,4,64,128,1,float16,fp8,0,0.014853333433469137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,4,64,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,0,0.015344000111023584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,4,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,0,0.016762666404247284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,0,0.016410666207472484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,0,0.016544000556071598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,1,64,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,1,64,0,1,fp8,fp8,0,0.015423999478419622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,0,0.016176000237464905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,2,64,128,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,2,64,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,4,64,128,1,float16,float16,0,0.01664000004529953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,4,64,128,1,float16,fp8,0,0.014842666685581207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,4,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,4,64,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,1,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,1,64,0,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,2,64,128,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,2,64,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,64,128,1,float16,float16,0,0.12964266538619995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,64,128,1,float16,fp8,0,0.1306826670964559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,64,128,1,fp8,fp8,0,0.12777599692344666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,64,0,1,float16,float16,0,0.7849493026733398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,64,128,1,float16,float16,0,0.07896000146865845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,64,0,1,float16,fp8,0,0.7878613471984863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,64,0,1,fp8,fp8,0,0.7325440247853597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,64,128,1,float16,fp8,0,0.08162666857242584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,64,0,1,float16,float16,0,0.462501327196757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,64,128,1,fp8,fp8,0,0.08121066788832347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,64,0,1,float16,fp8,0,0.4638400077819824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,64,128,1,float16,float16,0,0.07449600100517273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,64,0,1,fp8,fp8,0,0.43106667200724286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,64,0,1,float16,float16,0,0.4638719956080119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,64,128,1,float16,fp8,0,0.07758399844169617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,64,128,1,fp8,fp8,0,0.07302933434645335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,64,128,1,float16,float16,0,0.04994133114814758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,64,0,1,float16,fp8,0,0.4634773333867391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,64,0,1,float16,float16,0,0.3001546661059062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,64,0,1,fp8,fp8,0,0.4219839970270793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,64,128,1,float16,fp8,0,0.051226665576299034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,64,128,1,fp8,fp8,0,0.04985600213209788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,64,0,1,float16,fp8,0,0.2990773320198059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,64,0,1,fp8,fp8,0,0.2765386700630188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,64,128,1,float16,float16,0,0.048112000028292336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,64,128,1,float16,fp8,0,0.05022933085759481
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,64,0,1,float16,float16,0,0.2972213427225749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,64,128,1,fp8,fp8,0,0.04790933430194855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,64,128,1,float16,float16,0,0.03998400022586187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,64,0,1,float16,fp8,0,0.30083733797073364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,64,0,1,fp8,fp8,0,0.27345067262649536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,64,128,1,float16,fp8,0,0.03988266736268997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,64,0,1,float16,float16,0,0.203984002272288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,64,128,1,fp8,fp8,0,0.039642666776975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,64,128,1,float16,float16,0,0.039887999494870506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,64,0,1,fp8,fp8,0,0.19153066476186117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,64,0,1,float16,fp8,0,0.20408000548680624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,64,128,1,float16,fp8,0,0.039887999494870506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,64,128,1,fp8,fp8,0,0.0378506655494372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,64,0,1,float16,float16,0,0.204474667708079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,64,0,1,fp8,fp8,0,0.18937599658966064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,64,0,1,float16,fp8,0,0.20512000719706217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,64,128,1,float16,float16,0,0.10294933120409648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,64,128,1,float16,fp8,0,0.10549333691596985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,64,128,1,fp8,fp8,0,0.1013813316822052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,64,0,1,float16,float16,0,0.4899626572926839
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,64,128,1,float16,float16,0,0.062122667829195656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,64,0,1,fp8,fp8,0,0.45578134059906006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,64,0,1,float16,fp8,0,0.4946133295694987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,64,128,1,float16,fp8,0,0.06458666423956554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,64,128,1,fp8,fp8,0,0.062128002444903054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,64,0,1,float16,float16,0,0.2971573273340861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,64,128,1,float16,float16,0,0.060234665870666504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,64,0,1,fp8,fp8,0,0.2765760024388631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,64,0,1,float16,fp8,0,0.3002133369445801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,64,128,1,float16,fp8,0,0.06067200005054474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,64,128,1,fp8,fp8,0,0.05805333455403646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,64,0,1,float16,float16,0,0.2964106599489848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,64,128,1,float16,float16,0,0.04437333345413208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,64,0,1,fp8,fp8,0,0.27406932910283405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,64,0,1,float16,fp8,0,0.299621323744456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,64,0,1,float16,float16,0,0.19381332397460938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,64,128,1,float16,fp8,0,0.04574400186538696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,64,128,1,fp8,fp8,0,0.04402133325735728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,64,0,1,float16,fp8,0,0.194815993309021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,64,128,1,float16,float16,0,0.04359999795754751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,64,0,1,fp8,fp8,0,0.17941333850224814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,64,0,1,float16,float16,0,0.19397334257761636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,64,128,1,float16,fp8,0,0.04414933423201243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,64,128,1,fp8,fp8,0,0.041802664597829185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,64,0,1,float16,fp8,0,0.19392534097035727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,64,0,1,fp8,fp8,0,0.17813332875569662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,64,128,1,float16,float16,0,0.03607466568549474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,64,0,1,float16,float16,0,0.15460800131162009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,64,128,1,float16,fp8,0,0.03536533315976461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,64,128,1,fp8,fp8,0,0.033733333150545754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,64,0,1,float16,fp8,0,0.1569439967473348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,64,0,1,fp8,fp8,0,0.14637333154678345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,64,128,1,float16,float16,0,0.03369066615899404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,64,0,1,float16,float16,0,0.15668800473213196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,64,128,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,64,128,1,float16,fp8,0,0.03583466758330663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,64,0,1,float16,fp8,0,0.15588800112406412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,64,0,1,fp8,fp8,0,0.1469013293584188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,64,128,1,float16,float16,0,0.088128000497818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,64,128,1,float16,fp8,0,0.08900800347328186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,64,128,1,fp8,fp8,0,0.0851200024286906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,64,0,1,float16,float16,0,0.36720534165700275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,64,128,1,float16,float16,0,0.05707733333110809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,64,0,1,float16,fp8,0,0.37222933769226074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,64,0,1,fp8,fp8,0,0.3381173213322957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,64,0,1,float16,float16,0,0.23035200436909994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,64,128,1,float16,fp8,0,0.05872533222039541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,64,128,1,fp8,fp8,0,0.056133334835370384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,64,0,1,float16,fp8,0,0.2337119976679484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,64,0,1,fp8,fp8,0,0.21676800648371378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,64,128,1,float16,float16,0,0.054373333851496376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,64,0,1,float16,float16,0,0.2320906718571981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,64,128,1,float16,fp8,0,0.05630399783452352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,64,128,1,fp8,fp8,0,0.0544106662273407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,64,0,1,float16,fp8,0,0.2328746716181437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,64,128,1,float16,float16,0,0.041840001940727234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,64,0,1,float16,float16,0,0.14405866463979086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,64,0,1,fp8,fp8,0,0.2126506765683492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,64,128,1,float16,fp8,0,0.0436106671889623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,64,128,1,fp8,fp8,0,0.041637333730856575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,64,0,1,float16,fp8,0,0.14447466532389322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,64,0,1,fp8,fp8,0,0.1359999974568685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,64,128,1,float16,float16,0,0.04180799921353658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,64,0,1,float16,float16,0,0.14334400494893393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,64,128,1,float16,fp8,0,0.039834665755430855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,64,128,1,fp8,fp8,0,0.03975466638803482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,64,0,1,float16,fp8,0,0.1448906660079956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,64,0,1,fp8,fp8,0,0.1332480013370514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,64,128,1,float16,float16,0,0.03194666653871536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,64,0,1,float16,float16,0,0.13179199894269308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,64,128,1,float16,fp8,0,0.03329066683848699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,64,128,1,fp8,fp8,0,0.03133333226044973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,64,0,1,float16,fp8,0,0.1318933367729187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,64,128,1,float16,float16,0,0.03162133445342382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,64,0,1,fp8,fp8,0,0.1237440009911855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,64,128,1,float16,fp8,0,0.03162133445342382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,64,0,1,float16,float16,0,0.13177067041397095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,64,128,1,fp8,fp8,0,0.031290667752424874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,64,0,1,float16,fp8,0,0.13371200362841287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,64,0,1,fp8,fp8,0,0.12563199798266092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,64,128,1,float16,float16,0,0.12774399916330972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,64,128,1,float16,fp8,0,0.13061867157618204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,64,128,1,fp8,fp8,0,0.12782933314641318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,64,0,1,float16,float16,0,0.4560106595357259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,64,128,1,float16,float16,0,0.0767680009206136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,64,0,1,float16,fp8,0,0.45611735184987384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,64,0,1,fp8,fp8,0,0.42291200160980225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,64,0,1,float16,float16,0,0.26735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,64,128,1,float16,fp8,0,0.0784693310658137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,64,128,1,fp8,fp8,0,0.07870933413505554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,64,128,1,float16,float16,0,0.07251733541488647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,64,0,1,fp8,fp8,0,0.25092800458272296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,64,0,1,float16,fp8,0,0.2695733308792114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,64,128,1,float16,fp8,0,0.0743999977906545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,64,128,1,fp8,fp8,0,0.06952000161012013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,64,0,1,float16,float16,0,0.26517866055170697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,64,128,1,float16,float16,0,0.04572799801826477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,64,0,1,fp8,fp8,0,0.24307199319203696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,64,0,1,float16,fp8,0,0.26548800865809125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,64,0,1,float16,float16,0,0.17100799083709717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,64,128,1,float16,fp8,0,0.04823466638724009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,64,128,1,fp8,fp8,0,0.04601066807905833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,64,0,1,float16,fp8,0,0.17254932721455893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,64,128,1,float16,float16,0,0.043893332282702126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,64,0,1,fp8,fp8,0,0.15964800119400024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,64,128,1,float16,fp8,0,0.04580266773700714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,64,0,1,float16,float16,0,0.16890132427215576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,64,128,1,fp8,fp8,0,0.043925335009892784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,64,128,1,float16,float16,0,0.031541332602500916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,64,0,1,float16,fp8,0,0.17066667477289835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,64,0,1,fp8,fp8,0,0.1554026703039805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,64,0,1,float16,float16,0,0.11186666289965312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,64,128,1,float16,fp8,0,0.03142400085926056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,64,128,1,fp8,fp8,0,0.031504000226656594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,64,128,1,float16,float16,0,0.03128000100453695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,64,0,1,float16,fp8,0,0.11150933305422465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,64,0,1,fp8,fp8,0,0.10640533765157063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,64,128,1,float16,fp8,0,0.03178133318821589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,64,128,1,fp8,fp8,0,0.03146666785081228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,64,0,1,float16,float16,0,0.11346667011578877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,64,128,1,float16,float16,0,0.029311999678611755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,64,0,1,float16,fp8,0,0.11203733086585999
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,64,0,1,fp8,fp8,0,0.10529067118962605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,64,128,1,float16,fp8,0,0.031317333380381264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,64,0,1,float16,float16,0,0.10972266395886739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,64,128,1,fp8,fp8,0,0.029343999922275543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,64,0,1,float16,fp8,0,0.10909866293271382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,64,0,1,fp8,fp8,0,0.10309333602587382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,64,0,1,float16,float16,0,0.10937600334485371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,64,128,1,float16,float16,0,0.029578665892283123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,64,128,1,fp8,fp8,0,0.029178666571776073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,64,128,1,float16,fp8,0,0.030031998952229817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,64,0,1,float16,fp8,0,0.10924266775449117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,64,0,1,fp8,fp8,0,0.10337066650390625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,64,128,1,float16,float16,0,0.10128532846768697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,64,128,1,float16,fp8,0,0.10429867108662923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,64,128,1,fp8,fp8,0,0.10076799988746643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,64,0,1,float16,float16,0,0.29078932603200275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,64,128,1,float16,float16,0,0.06042666733264923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,64,0,1,float16,fp8,0,0.2908266584078471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,64,0,1,fp8,fp8,0,0.2693013350168864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,64,0,1,float16,float16,0,0.1763733426729838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,64,128,1,float16,fp8,0,0.06241066753864288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,64,128,1,fp8,fp8,0,0.06016000111897787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,64,0,1,float16,fp8,0,0.17579734325408936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,64,0,1,fp8,fp8,0,0.16547733545303345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,64,128,1,float16,float16,0,0.05821333328882853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,64,0,1,float16,float16,0,0.1731520096460978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,64,128,1,float16,fp8,0,0.058415999015172325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,64,128,1,fp8,fp8,0,0.05667733152707418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,64,0,1,float16,fp8,0,0.1731839974721273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,64,128,1,float16,float16,0,0.041696002086003624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,64,0,1,float16,float16,0,0.11403200030326843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,64,0,1,fp8,fp8,0,0.16030933459599814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,64,128,1,float16,fp8,0,0.04359466830889384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,64,128,1,fp8,fp8,0,0.03977066775163015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,64,0,1,float16,fp8,0,0.11430399616559346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,64,0,1,fp8,fp8,0,0.10531199971834819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,64,128,1,float16,float16,0,0.04170133173465729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,64,0,1,float16,float16,0,0.11283733447392781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,64,128,1,float16,fp8,0,0.040207999447981514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,64,128,1,fp8,fp8,0,0.0388373335202535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,64,0,1,float16,fp8,0,0.11382399996121724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,64,128,1,float16,float16,0,0.028773332635561626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,64,0,1,fp8,fp8,0,0.10528000195821126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,64,0,1,float16,float16,0,0.0888426701227824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,64,128,1,float16,fp8,0,0.02922666569550832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,64,128,1,fp8,fp8,0,0.0296426663796107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,64,0,1,float16,fp8,0,0.08894933263460796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,64,128,1,float16,float16,0,0.029674666623274486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,64,0,1,fp8,fp8,0,0.08311466872692108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,64,0,1,float16,float16,0,0.08855467041333516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,64,128,1,float16,fp8,0,0.029343999922275543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,64,128,1,fp8,fp8,0,0.027717334528764088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,64,0,1,float16,fp8,0,0.08881066242853801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,64,128,1,float16,float16,0,0.027493332823117573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,64,0,1,fp8,fp8,0,0.08226666847864787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,64,0,1,float16,float16,0,0.08683199683825175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,64,128,1,float16,fp8,0,0.02757333219051361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,64,128,1,fp8,fp8,0,0.025546667476495106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,64,0,1,float16,fp8,0,0.08681600292523702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,64,0,1,fp8,fp8,0,0.08063466846942902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,64,128,1,float16,float16,0,0.027301333844661713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,64,0,1,float16,float16,0,0.08477866649627686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,64,128,1,float16,fp8,0,0.027562665442625683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,64,128,1,fp8,fp8,0,0.02526933451493581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,64,0,1,float16,fp8,0,0.08498666683832805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,64,0,1,fp8,fp8,0,0.08057066798210144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,64,128,1,float16,float16,0,0.13356266419092813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,64,0,1,float16,float16,0,0.28596800565719604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,64,128,1,float16,fp8,0,0.1317813297112783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,64,128,1,fp8,fp8,0,0.12945066889127096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,64,128,1,float16,float16,0,0.07692799965540568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,64,0,1,float16,fp8,0,0.2903040051460266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,64,0,1,fp8,fp8,0,0.26631466547648114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,64,128,1,float16,fp8,0,0.08017600079377492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,64,128,1,fp8,fp8,0,0.07994133234024048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,64,0,1,float16,float16,0,0.16503999630610147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,64,128,1,float16,float16,0,0.07266666491826375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,64,0,1,float16,fp8,0,0.16894932587941489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,64,0,1,fp8,fp8,0,0.15930666526158652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,64,128,1,float16,fp8,0,0.07504533231258392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,64,128,1,fp8,fp8,0,0.07234666744867961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,64,0,1,float16,float16,0,0.16230400403340658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,64,128,1,float16,float16,0,0.04595733185609182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,64,0,1,float16,fp8,0,0.16403200229008993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,64,0,1,fp8,fp8,0,0.1508746643861135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,64,0,1,float16,float16,0,0.10357333223025005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,64,128,1,float16,fp8,0,0.04586666822433472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,64,128,1,fp8,fp8,0,0.04461333155632019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,64,0,1,float16,fp8,0,0.10539199908574422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,64,128,1,float16,float16,0,0.0446720023949941
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,64,0,1,fp8,fp8,0,0.09936533371607463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,64,128,1,float16,fp8,0,0.04620266457398733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,64,0,1,float16,float16,0,0.10131733616193135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,64,128,1,fp8,fp8,0,0.04358933369318644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,64,128,1,float16,float16,0,0.03002133220434189
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,64,0,1,float16,fp8,0,0.1034346620241801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,64,0,1,fp8,fp8,0,0.09649599591890971
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,64,0,1,float16,float16,0,0.06855999926726024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,64,128,1,float16,fp8,0,0.03120533376932144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,64,128,1,fp8,fp8,0,0.029813334345817566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,64,0,1,float16,fp8,0,0.0682773341735204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,64,128,1,float16,float16,0,0.029557332396507263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,64,0,1,fp8,fp8,0,0.06442666550477345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,64,0,1,float16,float16,0,0.06717333197593689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,64,128,1,float16,fp8,0,0.030106666187445324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,64,128,1,fp8,fp8,0,0.029637334247430164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,64,0,1,float16,fp8,0,0.06889066596825917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,64,0,1,fp8,fp8,0,0.06418133278687795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,64,128,1,float16,float16,0,0.02720000098148982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,64,128,1,float16,fp8,0,0.027530667682488758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,64,0,1,float16,float16,0,0.064410666624705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,64,128,1,fp8,fp8,0,0.02717333287000656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,64,0,1,float16,fp8,0,0.06444799900054932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,64,0,1,fp8,fp8,0,0.06016000111897787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,64,128,1,float16,float16,0,0.027215999861558277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,64,128,1,float16,fp8,0,0.02532266577084859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,64,0,1,float16,float16,0,0.06402666866779327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,64,128,1,fp8,fp8,0,0.025583999852339428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,64,0,1,float16,fp8,0,0.06411733229955037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,64,128,1,float16,float16,0,0.02516266703605652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,64,0,1,fp8,fp8,0,0.06051200131575266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,64,0,1,float16,float16,0,0.06453866759936015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,64,128,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,64,128,1,fp8,fp8,0,0.024080000817775726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,64,0,1,float16,fp8,0,0.06247999767462412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,64,128,1,float16,float16,0,0.02516266703605652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,64,0,1,fp8,fp8,0,0.06044266621271769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,64,128,1,float16,fp8,0,0.02499199906984965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,64,0,1,float16,float16,0,0.06244266529877981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,64,128,1,fp8,fp8,0,0.025114665428797405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,64,0,1,fp8,fp8,0,0.0584746648867925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,64,0,1,float16,fp8,0,0.06251733501752217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,64,128,1,float16,float16,0,0.10731200377146403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,64,0,1,float16,float16,0,0.19507733980814615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,64,128,1,float16,fp8,0,0.1088853379090627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,64,128,1,fp8,fp8,0,0.1071519951025645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,64,0,1,float16,fp8,0,0.1941759983698527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,64,128,1,float16,float16,0,0.0640533318122228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,64,0,1,fp8,fp8,0,0.18405866622924805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,64,0,1,float16,float16,0,0.11664533615112305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,64,128,1,float16,fp8,0,0.06432533264160156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,64,128,1,fp8,fp8,0,0.06228800117969513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,64,0,1,float16,fp8,0,0.11758933464686076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,64,128,1,float16,float16,0,0.06035199761390686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,64,0,1,fp8,fp8,0,0.11069333553314209
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,64,0,1,float16,float16,0,0.11171733339627583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,64,128,1,float16,fp8,0,0.062047998110453285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,64,128,1,fp8,fp8,0,0.0581279993057251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,64,0,1,float16,fp8,0,0.11309867103894551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,64,0,1,fp8,fp8,0,0.10501866539319356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,64,128,1,float16,float16,0,0.04158399999141693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,64,0,1,float16,float16,0,0.07258133093516032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,64,128,1,float16,fp8,0,0.04176533222198486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,64,128,1,fp8,fp8,0,0.039813332259655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,64,0,1,float16,fp8,0,0.07442666590213776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,64,0,1,fp8,fp8,0,0.06874133149782817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,64,128,1,float16,float16,0,0.04030400017897288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,64,0,1,float16,float16,0,0.07063466807206471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,64,128,1,float16,fp8,0,0.04079466561476389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,64,128,1,fp8,fp8,0,0.03888533264398575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,64,0,1,float16,fp8,0,0.07238399982452393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,64,0,1,fp8,fp8,0,0.06846933563550313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,64,128,1,float16,float16,0,0.02956266701221466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,64,0,1,float16,float16,0,0.05393599967161814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,64,128,1,float16,fp8,0,0.029680001238981884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,64,128,1,fp8,fp8,0,0.029301332930723827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,64,0,1,float16,fp8,0,0.05603733162085215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,64,128,1,float16,float16,0,0.027493332823117573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,64,0,1,fp8,fp8,0,0.05249066650867462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,64,0,1,float16,float16,0,0.0554666668176651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,64,128,1,float16,fp8,0,0.02941333254178365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,64,128,1,fp8,fp8,0,0.02624533325433731
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,64,0,1,float16,fp8,0,0.05605866511662801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,64,0,1,fp8,fp8,0,0.05225066840648651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,64,128,1,float16,float16,0,0.02548266698916753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,64,0,1,float16,float16,0,0.05219733218352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,64,128,1,float16,fp8,0,0.025285333395004272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,64,128,1,fp8,fp8,0,0.02497600018978119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,64,0,1,float16,fp8,0,0.052111998200416565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,64,0,1,fp8,fp8,0,0.0487360010544459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,64,128,1,float16,float16,0,0.02531733363866806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,64,0,1,float16,float16,0,0.051967998345692955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,64,128,1,float16,fp8,0,0.02359466751416524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,64,128,1,fp8,fp8,0,0.023984000086784363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,64,0,1,float16,fp8,0,0.05202666421731313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,64,0,1,fp8,fp8,0,0.049653331438700356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,64,128,1,float16,float16,0,0.023397333920001984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,64,0,1,float16,float16,0,0.0518506666024526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,64,128,1,float16,fp8,0,0.023445333043734234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,64,128,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,64,0,1,float16,fp8,0,0.05215999980767568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,64,0,1,fp8,fp8,0,0.047728002071380615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,64,128,1,float16,float16,0,0.023391999304294586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,64,0,1,float16,float16,0,0.05003199974695841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,64,128,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,64,128,1,fp8,fp8,0,0.023285334308942158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,64,0,1,float16,fp8,0,0.05012799799442291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,64,0,1,fp8,fp8,0,0.04808533191680908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,64,128,1,float16,float16,0,0.13201066851615906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,64,128,1,float16,fp8,0,0.13454399506251016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,64,0,1,float16,float16,0,0.1983893314997355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,64,128,1,fp8,fp8,0,0.12652267018953958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,64,0,1,float16,fp8,0,0.20172266165415445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,64,0,1,fp8,fp8,0,0.18558400869369507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,64,128,1,float16,float16,0,0.07665066421031952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,64,128,1,float16,fp8,0,0.07969066500663757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,64,0,1,float16,float16,0,0.11400000254313152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,64,128,1,fp8,fp8,0,0.0786293347676595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,64,0,1,float16,fp8,0,0.11515733599662781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,64,0,1,fp8,fp8,0,0.11196266611417134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,64,128,1,float16,float16,0,0.07257066667079926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,64,128,1,float16,fp8,0,0.07441066702206929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,64,0,1,float16,float16,0,0.10773332913716634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,64,128,1,fp8,fp8,0,0.07083733379840851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,64,128,1,float16,float16,0,0.0459199994802475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,64,0,1,float16,fp8,0,0.11007466912269592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,64,0,1,fp8,fp8,0,0.10337600111961365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,64,0,1,float16,float16,0,0.07074666519959767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,64,128,1,float16,fp8,0,0.0460746685663859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,64,128,1,fp8,fp8,0,0.04574933151404063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,64,0,1,float16,fp8,0,0.07223466535409291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,64,128,1,float16,float16,0,0.043706665436426796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,64,0,1,fp8,fp8,0,0.06844266752401988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,64,0,1,float16,float16,0,0.06946133573849995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,64,128,1,float16,fp8,0,0.04364799956480662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,64,128,1,fp8,fp8,0,0.04372266431649526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,64,128,1,float16,float16,0,0.02943466603755951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,64,0,1,float16,float16,0,0.047744000951449074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,64,0,1,float16,fp8,0,0.07040533423423767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,64,0,1,fp8,fp8,0,0.06644266843795776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,64,128,1,float16,fp8,0,0.031504000226656594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,64,128,1,fp8,fp8,0,0.029722665747006733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,64,0,1,float16,fp8,0,0.047925333182017006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,64,0,1,fp8,fp8,0,0.04576000074545542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,64,128,1,float16,float16,0,0.029648000995318096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,64,0,1,float16,float16,0,0.046021332343419395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,64,128,1,float16,fp8,0,0.031397332747777305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,64,128,1,fp8,fp8,0,0.029616000751654308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,64,0,1,float16,fp8,0,0.0476800004641215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,64,128,1,float16,float16,0,0.02536533276240031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,64,0,1,fp8,fp8,0,0.04388799766699473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,64,0,1,float16,float16,0,0.04176533222198486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,64,128,1,float16,fp8,0,0.025231999655564625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,64,128,1,fp8,fp8,0,0.0232640008131663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,64,0,1,float16,fp8,0,0.04390933116277059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,64,0,1,fp8,fp8,0,0.03941866755485535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,64,128,1,float16,float16,0,0.023402666052182514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,64,128,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,64,0,1,float16,float16,0,0.04168533285458883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,64,128,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,64,0,1,float16,fp8,0,0.04158399999141693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,64,128,1,float16,float16,0,0.02370133250951767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,64,0,1,fp8,fp8,0,0.03957866628964742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,64,0,1,float16,float16,0,0.039818666875362396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,64,128,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,64,128,1,fp8,fp8,0,0.022783999641736347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,64,128,1,float16,float16,0,0.02316266546646754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,64,0,1,fp8,fp8,0,0.03746666759252548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,64,0,1,float16,fp8,0,0.039477333426475525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,64,0,1,float16,float16,0,0.039781334499518074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,64,128,1,float16,fp8,0,0.02351466566324234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,64,128,1,fp8,fp8,0,0.02317333221435547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,64,0,1,float16,fp8,0,0.03979733337958654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,64,128,1,float16,float16,0,0.023178666830062866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,64,0,1,fp8,fp8,0,0.03774400055408478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,64,0,1,float16,float16,0,0.03939199944337209
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,64,128,1,float16,fp8,0,0.023050665855407715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,64,128,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,64,0,1,float16,fp8,0,0.03994133323431015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,64,0,1,fp8,fp8,0,0.037685332198937736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,64,128,1,float16,float16,0,0.021503999829292297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,64,0,1,float16,float16,0,0.03969600051641464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,64,128,1,fp8,fp8,0,0.021210665504137676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,64,128,1,float16,fp8,0,0.021407999098300934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,64,0,1,float16,fp8,0,0.03953066716591517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,64,0,1,fp8,fp8,0,0.03751466671625773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,64,128,1,float16,float16,0,0.1071626643339793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,64,128,1,float16,fp8,0,0.11261333028475444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,64,0,1,float16,float16,0,0.13979199528694153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,64,128,1,fp8,fp8,0,0.1056106686592102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,64,0,1,float16,fp8,0,0.14644799629847208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,64,0,1,fp8,fp8,0,0.13326399525006613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,64,128,1,float16,float16,0,0.06428800026575725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,64,128,1,float16,fp8,0,0.06410133341948192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,64,0,1,float16,float16,0,0.08499200145403545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,64,128,1,fp8,fp8,0,0.06081066528956095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,64,0,1,float16,fp8,0,0.08682666222254436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,64,0,1,fp8,fp8,0,0.08075200021266937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,64,128,1,float16,float16,0,0.060080001751581825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,64,0,1,float16,float16,0,0.08228800197442372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,64,128,1,float16,fp8,0,0.06206400195757548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,64,128,1,fp8,fp8,0,0.05772800246874491
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,64,0,1,float16,fp8,0,0.08297066887219746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,64,0,1,fp8,fp8,0,0.07828799883524577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,64,128,1,float16,float16,0,0.04159466673930486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,64,0,1,float16,float16,0,0.05388266841570536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,64,128,1,float16,fp8,0,0.044031997521718345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,64,128,1,fp8,fp8,0,0.04163199911514918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,64,0,1,float16,fp8,0,0.05625066657861074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,64,0,1,fp8,fp8,0,0.05195199946562449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,64,128,1,float16,float16,0,0.04164266586303711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,64,0,1,float16,float16,0,0.05406400064627329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,64,128,1,float16,fp8,0,0.04136000076929728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,64,128,1,fp8,fp8,0,0.03957866628964742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,64,0,1,float16,fp8,0,0.05552533268928528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,64,0,1,fp8,fp8,0,0.05038933455944061
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,64,128,1,float16,float16,0,0.029546665648619335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,64,0,1,float16,float16,0,0.03973866750796636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,64,128,1,float16,fp8,0,0.029301332930723827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,64,128,1,fp8,fp8,0,0.02720533311367035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,64,0,1,float16,fp8,0,0.039642666776975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,64,0,1,fp8,fp8,0,0.03965866565704346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,64,128,1,float16,float16,0,0.027424000203609467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,64,0,1,float16,float16,0,0.03865066667397817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,64,128,1,float16,fp8,0,0.02916266769170761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,64,128,1,fp8,fp8,0,0.027215999861558277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,64,0,1,float16,fp8,0,0.03995733211437861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,64,0,1,fp8,fp8,0,0.03745600084463755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,64,128,1,float16,float16,0,0.023530667026837666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,64,0,1,float16,float16,0,0.03549866626660029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,64,128,1,float16,fp8,0,0.025050667424996693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,64,128,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,64,0,1,float16,fp8,0,0.035786665976047516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,64,0,1,fp8,fp8,0,0.03334933271010717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,64,128,1,float16,float16,0,0.02370133250951767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,64,0,1,float16,float16,0,0.03531199942032496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,64,128,1,float16,fp8,0,0.02535466601451238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,64,128,1,fp8,fp8,0,0.023408000667889912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,64,0,1,float16,fp8,0,0.03541333228349686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,64,128,1,float16,float16,0,0.023215999205907185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,64,0,1,float16,float16,0,0.03345066557327906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,64,128,1,float16,fp8,0,0.021381333470344543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,64,0,1,fp8,fp8,0,0.034485332667827606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,64,128,1,fp8,fp8,0,0.021488000949223835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,64,0,1,float16,fp8,0,0.033402666449546814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,64,0,1,fp8,fp8,0,0.0317493329445521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,64,128,1,float16,float16,0,0.023760000864664715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,64,0,1,float16,float16,0,0.035402665535608925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,64,128,1,float16,fp8,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,64,128,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,64,0,1,float16,fp8,0,0.03446399917205175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,64,0,1,fp8,fp8,0,0.031290667752424874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,64,128,1,float16,float16,0,0.021722666919231415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,64,0,1,float16,float16,0,0.03342399994532267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,64,128,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,64,128,1,float16,fp8,0,0.023605334262053173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,64,0,1,float16,fp8,0,0.03327466547489166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,64,0,1,fp8,fp8,0,0.031583999594052635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,64,128,1,float16,float16,0,0.021386665602525074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,64,128,1,float16,fp8,0,0.021370666722456615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,64,0,1,float16,float16,0,0.03382399926582972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,64,128,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,64,0,1,float16,fp8,0,0.03307733436425527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,64,0,1,fp8,fp8,0,0.03172266731659571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,64,128,1,float16,float16,0,0.10975999633471172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,64,0,1,float16,float16,0,0.1328426698843638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,64,128,1,float16,fp8,0,0.11020800471305847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,64,128,1,fp8,fp8,0,0.10609599947929382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,64,0,1,float16,fp8,0,0.13193066914876303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,64,128,1,float16,float16,0,0.06473066906134288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,64,0,1,fp8,fp8,0,0.1264746685822805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,64,0,1,float16,float16,0,0.07730133334795634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,64,128,1,float16,fp8,0,0.06531199812889099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,64,128,1,fp8,fp8,0,0.06512000163396199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,64,0,1,float16,fp8,0,0.07782933115959167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,64,0,1,fp8,fp8,0,0.07677866518497467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,64,128,1,float16,float16,0,0.06217066446940104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,64,0,1,float16,float16,0,0.0751146674156189
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,64,128,1,float16,fp8,0,0.06275199850400288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,64,128,1,fp8,fp8,0,0.06050133208433787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,64,0,1,float16,fp8,0,0.07602133353551228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,64,0,1,fp8,fp8,0,0.07222400108973186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,64,128,1,float16,float16,0,0.03980266551176707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,64,0,1,float16,float16,0,0.04977599779764811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,64,128,1,float16,fp8,0,0.03955200066169103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,64,128,1,fp8,fp8,0,0.039994666973749794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,64,0,1,float16,fp8,0,0.04997866849104563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,64,0,1,fp8,fp8,0,0.047482664386431374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,64,128,1,float16,float16,0,0.03995199998219808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,64,0,1,float16,float16,0,0.0476746658484141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,64,128,1,float16,fp8,0,0.03984533250331879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,64,128,1,fp8,fp8,0,0.039706667264302574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,64,0,1,float16,fp8,0,0.04769066472848257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,64,0,1,fp8,fp8,0,0.04756266872088114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,64,128,1,float16,float16,0,0.027647999425729115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,64,0,1,float16,float16,0,0.03339199970165888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,64,128,1,float16,fp8,0,0.027221334477265675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,64,128,1,fp8,fp8,0,0.02741866558790207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,64,0,1,float16,fp8,0,0.03367999941110611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,64,0,1,fp8,fp8,0,0.03134933362404505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,64,128,1,float16,float16,0,0.027237333357334137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,64,0,1,float16,float16,0,0.03225066761175791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,64,128,1,float16,fp8,0,0.026165333886941273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,64,128,1,fp8,fp8,0,0.026191999514897663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,64,128,1,float16,float16,0,0.02316266546646754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,64,0,1,fp8,fp8,0,0.03130666663249334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,64,0,1,float16,fp8,0,0.031471999982992806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,64,0,1,float16,float16,0,0.029578665892283123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,64,128,1,float16,fp8,0,0.02311466634273529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,64,128,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,64,0,1,float16,fp8,0,0.029631999631722767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,64,0,1,fp8,fp8,0,0.02937600016593933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,64,128,1,float16,float16,0,0.023120000958442688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,64,0,1,float16,float16,0,0.02961066613594691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,64,128,1,float16,fp8,0,0.023408000667889912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,64,128,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,64,0,1,float16,fp8,0,0.02958933264017105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,64,0,1,fp8,fp8,0,0.027450665831565857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,64,128,1,float16,float16,0,0.021386665602525074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,64,0,1,float16,float16,0,0.027210667729377747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,64,128,1,float16,fp8,0,0.021221332252025604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,64,128,1,fp8,fp8,0,0.02109333376089732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,64,0,1,float16,fp8,0,0.027237333357334137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,64,128,1,float16,float16,0,0.02143999934196472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,64,0,1,fp8,fp8,0,0.02796799937884013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,64,0,1,float16,float16,0,0.02720000098148982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,64,128,1,float16,fp8,0,0.021349333226680756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,64,128,1,fp8,fp8,0,0.021615999440352123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,64,0,1,float16,fp8,0,0.02758399893840154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,64,0,1,fp8,fp8,0,0.027295999228954315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,64,0,1,float16,float16,0,0.027248000105222065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,64,128,1,float16,float16,0,0.021642667551835377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,64,128,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,64,128,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,64,0,1,float16,fp8,0,0.02834133307139079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,64,0,1,fp8,fp8,0,0.0252960001428922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,64,128,1,float16,float16,0,0.0215786670645078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,64,0,1,float16,float16,0,0.02712533374627431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,64,128,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,64,128,1,float16,fp8,0,0.021520001192887623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,64,0,1,float16,fp8,0,0.027295999228954315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,64,0,1,fp8,fp8,0,0.02622933437426885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,64,128,1,float16,float16,0,0.021040000021457672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,64,128,1,float16,fp8,0,0.02130666623512904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,64,0,1,float16,float16,0,0.027589333554108936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,64,128,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,64,0,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,64,0,1,fp8,fp8,0,0.025600001215934753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,64,128,1,float16,float16,0,0.021583999196688335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,64,0,1,float16,float16,0,0.027232001225153606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,64,128,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,64,128,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,64,0,1,float16,fp8,0,0.02716800073782603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,64,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,64,128,1,float16,float16,0,0.10877333084742229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,64,0,1,float16,float16,0,0.11142399907112122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,64,128,1,fp8,fp8,0,0.10523733496665955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,64,128,1,float16,fp8,0,0.11003200213114421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,64,128,1,float16,float16,0,0.06262933214505513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,64,0,1,float16,fp8,0,0.1095360020796458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,64,0,1,float16,float16,0,0.06460799773534139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,64,0,1,fp8,fp8,0,0.10539199908574422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,64,128,1,float16,fp8,0,0.0622026671965917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,64,128,1,fp8,fp8,0,0.062277331948280334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,64,0,1,float16,fp8,0,0.0639573335647583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,64,0,1,fp8,fp8,0,0.062405332922935486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,64,128,1,float16,float16,0,0.06230400005976359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,64,0,1,float16,float16,0,0.062463998794555664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,64,128,1,float16,fp8,0,0.06041066845258077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,64,128,1,fp8,fp8,0,0.05916266640027364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,64,0,1,float16,fp8,0,0.06217066446940104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,64,128,1,float16,float16,0,0.039962666730086006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,64,0,1,fp8,fp8,0,0.06035199761390686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,64,0,1,float16,float16,0,0.041562666495641075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,64,128,1,float16,fp8,0,0.03976000100374222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,64,128,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,64,0,1,float16,fp8,0,0.039594667653242745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,64,0,1,fp8,fp8,0,0.039818666875362396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,64,128,1,float16,float16,0,0.039594667653242745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,64,0,1,float16,float16,0,0.03990933299064636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,64,128,1,float16,fp8,0,0.03930133332808813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,64,128,1,fp8,fp8,0,0.037791999677817024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,64,0,1,float16,fp8,0,0.03992533435424169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,64,0,1,fp8,fp8,0,0.0397119993964831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,64,128,1,float16,float16,0,0.02720000098148982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,64,0,1,float16,float16,0,0.02717333287000656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,64,128,1,float16,fp8,0,0.027093333502610523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,64,128,1,fp8,fp8,0,0.02741333345572154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,64,0,1,float16,fp8,0,0.027493332823117573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,64,0,1,fp8,fp8,0,0.02754133443037669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,64,128,1,float16,float16,0,0.026698666314284008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,64,0,1,float16,float16,0,0.02756800005833308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,64,128,1,float16,fp8,0,0.025920001169045765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,64,128,1,fp8,fp8,0,0.02550400048494339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,64,0,1,float16,fp8,0,0.027290667096773785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,64,0,1,fp8,fp8,0,0.027290667096773785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,64,128,1,float16,float16,0,0.023402666052182514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,64,0,1,float16,float16,0,0.023056000471115112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,64,128,1,fp8,fp8,0,0.023120000958442688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,64,0,1,float16,fp8,0,0.0242399995525678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,64,0,1,fp8,fp8,0,0.023077333966890972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,64,128,1,float16,float16,0,0.02319466571013133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,64,0,1,float16,float16,0,0.023552000522613525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,64,128,1,float16,fp8,0,0.02317333221435547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,64,128,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,64,0,1,float16,fp8,0,0.023237332701683044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,64,0,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,64,128,1,float16,float16,0,0.02130666623512904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,64,0,1,float16,float16,0,0.02311466634273529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,64,128,1,float16,fp8,0,0.022831998765468597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,64,128,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,64,0,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,64,0,1,fp8,fp8,0,0.021402666966120403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,64,128,1,float16,float16,0,0.021242665747801464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,64,0,1,float16,float16,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,64,128,1,float16,fp8,0,0.023130667706330616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,64,128,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,64,0,1,float16,fp8,0,0.021690666675567627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,64,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,64,128,1,float16,float16,0,0.020997333029905956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,64,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,64,128,1,float16,fp8,0,0.021125334004561108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,64,128,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,64,0,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,64,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,64,128,1,float16,float16,0,0.021045332153638203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,64,0,1,float16,float16,0,0.02160533269246419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,64,128,1,float16,fp8,0,0.02144533395767212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,64,128,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,64,0,1,float16,fp8,0,0.022863999009132385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,64,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,64,128,1,float16,float16,0,0.02110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,64,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,64,128,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,64,128,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,64,0,1,float16,fp8,0,0.021386665602525074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,64,0,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,64,128,1,float16,float16,0,0.019274666905403137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,64,0,1,float16,float16,0,0.021482666333516438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,64,128,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,64,128,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,64,0,1,float16,fp8,0,0.021727999051411945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,64,0,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,64,128,1,float16,float16,0,0.020949333906173706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,64,0,1,float16,float16,0,0.02143999934196472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,64,128,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,64,128,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,64,0,1,float16,fp8,0,0.021962667504946392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,64,0,1,fp8,fp8,0,0.02081599955757459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,64,128,1,float16,float16,0,0.019296000401178997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,64,0,1,float16,float16,0,0.021370666722456615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,64,128,1,float16,fp8,0,0.020975999534130096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,64,128,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,64,0,1,float16,fp8,0,0.02142400046189626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,64,0,1,fp8,fp8,0,0.021594665944576263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,64,128,1,float16,float16,0,0.05858666698137919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,64,0,1,float16,float16,0,0.056186666091283165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,64,128,1,float16,fp8,0,0.058389330903689064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,64,128,1,float16,float16,0,0.036570665736993156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,64,128,1,fp8,fp8,0,0.05710400144259135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,64,0,1,float16,fp8,0,0.05794133245944977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,64,0,1,fp8,fp8,0,0.054986665646235146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,64,128,1,float16,fp8,0,0.037632000943024956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,64,128,1,fp8,fp8,0,0.037104000647862755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,64,0,1,float16,float16,0,0.0365226666132609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,64,0,1,float16,fp8,0,0.035530666510264076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,64,0,1,fp8,fp8,0,0.03549333413441976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,64,128,1,float16,float16,0,0.03640533238649368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,64,0,1,float16,float16,0,0.0354666660229365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,64,128,1,float16,fp8,0,0.037434667348861694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,64,128,1,fp8,fp8,0,0.03594133257865906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,64,0,1,float16,fp8,0,0.035616000493367515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,64,0,1,fp8,fp8,0,0.03375466664632162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,64,128,1,float16,float16,0,0.02569066733121872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,64,0,1,float16,float16,0,0.025173333783944447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,64,128,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,64,0,1,float16,fp8,0,0.025392000873883564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,64,128,1,fp8,fp8,0,0.02550400048494339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,64,0,1,fp8,fp8,0,0.02537599951028824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,64,128,1,float16,float16,0,0.025274666647116344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,64,0,1,float16,float16,0,0.025445332129796345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,64,128,1,float16,fp8,0,0.025290665527184803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,64,128,1,fp8,fp8,0,0.02537599951028824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,64,0,1,fp8,fp8,0,0.025231999655564625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,64,0,1,float16,fp8,0,0.025216000775496166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,64,128,1,float16,float16,0,0.021242665747801464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,64,0,1,float16,float16,0,0.021151999632517498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,64,128,1,float16,fp8,0,0.02128000060717265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,64,128,1,fp8,fp8,0,0.02143999934196472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,64,0,1,float16,fp8,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,64,0,1,fp8,fp8,0,0.021274665991465252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,64,128,1,float16,float16,0,0.021018666525681812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,64,0,1,float16,float16,0,0.020693333198626835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,64,128,1,float16,fp8,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,64,128,1,fp8,fp8,0,0.021002667645613354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,64,0,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,64,0,1,fp8,fp8,0,0.021061333517233532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,64,128,1,float16,float16,0,0.021221332252025604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,64,0,1,float16,float16,0,0.01933866615096728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,64,128,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,64,128,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,64,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,64,0,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,64,128,1,float16,float16,0,0.019071999937295914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,64,0,1,float16,float16,0,0.019253333409627277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,64,128,1,float16,fp8,0,0.01926400015751521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,64,128,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,64,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,64,0,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,64,0,1,float16,float16,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,64,128,1,float16,fp8,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,64,128,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,64,0,1,float16,fp8,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,64,0,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,64,128,1,float16,float16,0,0.019130667050679524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,64,0,1,float16,float16,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,64,128,1,float16,fp8,0,0.01964266722400983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,64,128,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,64,0,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,64,0,1,fp8,fp8,0,0.01770666614174843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,64,128,1,float16,float16,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,64,0,1,float16,float16,0,0.018288000176350277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,64,128,1,fp8,fp8,0,0.019381333142518997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,64,0,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,64,128,1,float16,fp8,0,0.01923199991385142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,64,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,64,128,1,float16,float16,0,0.018954666952292126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,64,0,1,float16,float16,0,0.019082666685183842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,64,128,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,64,128,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,64,0,1,float16,fp8,0,0.01952533299724261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,64,0,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,64,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,64,0,1,float16,float16,0,0.01930133377512296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,64,128,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,64,128,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,64,0,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,64,0,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,64,128,1,float16,float16,0,0.018351999421914417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,64,0,1,float16,float16,0,0.0183999997874101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,64,128,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,64,128,1,fp8,fp8,0,0.01889066646496455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,64,0,1,float16,fp8,0,0.019546666493018467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,64,128,1,float16,float16,0,0.01899733394384384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,64,0,1,float16,float16,0,0.01932799940307935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,64,128,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,64,128,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,64,0,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,64,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,64,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,64,0,1,float16,float16,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,64,128,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,64,128,1,float16,fp8,0,0.020037333170572918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,64,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,64,0,1,fp8,fp8,0,0.017573333034912746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,64,128,1,float16,float16,0,0.03766400118668874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,64,128,1,float16,fp8,0,0.03770133356253306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,64,128,1,fp8,fp8,0,0.03691199918588003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,64,0,1,float16,float16,0,0.039162665605545044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,64,0,1,float16,fp8,0,0.03809600075085958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,64,0,1,fp8,fp8,0,0.035599999129772186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,64,128,1,float16,float16,0,0.027562665442625683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,64,0,1,float16,float16,0,0.025568000972270966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,64,128,1,float16,fp8,0,0.027104000250498455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,64,128,1,fp8,fp8,0,0.027429332335789997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,64,0,1,float16,fp8,0,0.02532800038655599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,64,0,1,fp8,fp8,0,0.02701333413521449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,64,128,1,float16,float16,0,0.02568000058333079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,64,0,1,float16,float16,0,0.025306666890780132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,64,128,1,float16,fp8,0,0.025333332518736523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,64,0,1,float16,fp8,0,0.025472000241279602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,64,128,1,fp8,fp8,0,0.025466665625572205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,64,0,1,fp8,fp8,0,0.025402667621771496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,64,128,1,float16,float16,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,64,0,1,float16,float16,0,0.019674666225910187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,64,128,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,64,128,1,fp8,fp8,0,0.019509332875410717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,64,0,1,float16,fp8,0,0.01950399950146675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,64,0,1,fp8,fp8,0,0.01958400011062622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,64,128,1,float16,float16,0,0.019215999792019527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,64,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,64,128,1,float16,fp8,0,0.019834666202465694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,64,128,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,64,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,64,0,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,64,0,1,float16,float16,0,0.017477333545684814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,64,128,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,64,128,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,64,0,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,64,128,1,float16,float16,0,0.017349333812793095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,64,0,1,float16,float16,0,0.01764800027012825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,64,128,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,64,0,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,64,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,64,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,64,128,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,64,128,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,64,0,1,float16,fp8,0,0.017456000049908955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,64,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,64,128,1,float16,float16,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,64,128,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,64,128,1,fp8,fp8,0,0.016271999726692837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,64,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,64,0,1,fp8,fp8,0,0.017498667041460674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,64,128,1,float16,float16,0,0.017466666797796886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,64,128,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,64,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,64,128,1,float16,float16,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,64,0,1,float16,float16,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,64,128,1,float16,fp8,0,0.0174346665541331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,64,128,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,64,128,1,float16,float16,0,0.017504000415404636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,64,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,64,128,1,fp8,fp8,0,0.016229332735141117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,64,0,1,fp8,fp8,0,0.015573333948850632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,64,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,64,0,1,float16,float16,0,0.016645333419243496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,64,128,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,64,128,1,fp8,fp8,0,0.015397333850463232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,64,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,64,128,1,float16,float16,0,0.01573333392540614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,64,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,64,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,64,128,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,64,128,1,float16,float16,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,64,0,1,float16,float16,0,0.01602666700879733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,64,128,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,64,128,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,64,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,64,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,64,128,1,float16,float16,0,0.01684800038735072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,64,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,64,128,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,64,128,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,64,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,64,128,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,64,128,1,fp8,fp8,0,0.01565333331624667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,64,0,1,fp8,fp8,0,0.015423999478419622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,64,128,1,float16,float16,0,0.0315733328461647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,64,0,1,float16,float16,0,0.03130666663249334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,64,128,1,float16,fp8,0,0.03121600051720937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,64,0,1,float16,fp8,0,0.03159466634194056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,64,128,1,fp8,fp8,0,0.029765332738558452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,64,0,1,fp8,fp8,0,0.031082667410373688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,64,128,1,float16,float16,0,0.02128000060717265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,64,0,1,float16,float16,0,0.023237332701683044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,64,128,1,float16,fp8,0,0.023386667172114056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,64,128,1,fp8,fp8,0,0.023056000471115112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,64,0,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,64,0,1,float16,fp8,0,0.023813332120577495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,64,128,1,float16,float16,0,0.02319466571013133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,64,0,1,float16,float16,0,0.021397332350413006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,64,128,1,float16,fp8,0,0.023391999304294586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,64,128,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,64,0,1,float16,fp8,0,0.02330133318901062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,64,128,1,float16,float16,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,64,0,1,float16,float16,0,0.017583999782800674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,64,0,1,fp8,fp8,0,0.02143999934196472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,64,128,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,64,128,1,fp8,fp8,0,0.01950399950146675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,64,0,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,64,0,1,fp8,fp8,0,0.017866666118303936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,64,128,1,float16,float16,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,64,0,1,float16,float16,0,0.017808000246683758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,64,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,64,128,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,64,0,1,float16,fp8,0,0.0179626668492953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,64,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,64,0,1,float16,float16,0,0.016538667182127636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,64,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,64,128,1,float16,float16,0,0.015360000232855478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,64,0,1,float16,float16,0,0.015813333292802174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,64,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,64,128,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,64,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,64,128,1,float16,fp8,0,0.017616000026464462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,64,128,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,64,0,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,64,128,1,float16,float16,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,64,128,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,64,0,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,64,0,1,fp8,fp8,0,0.015552000453074774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,64,128,1,float16,float16,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,64,0,1,float16,float16,0,0.015423999478419622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,64,128,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,64,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,64,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,64,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,64,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,64,128,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,64,0,1,float16,fp8,0,0.01588800052801768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,64,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,64,128,1,float16,float16,0,0.015082667271296183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,64,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,64,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,64,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,64,0,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,64,128,1,float16,float16,0,0.016800000021855038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,64,128,1,float16,fp8,0,0.015381333728631338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,64,128,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,64,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,64,128,1,float16,float16,0,0.014858666807413101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,64,0,1,float16,float16,0,0.016832000265518825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,64,128,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,64,128,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,64,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,64,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,64,128,1,float16,float16,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,64,0,1,float16,float16,0,0.01552533358335495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,64,128,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,64,128,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,64,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,64,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,64,128,1,float16,float16,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,64,0,1,float16,float16,0,0.016282666474580765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,64,0,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,64,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,64,128,1,float16,fp8,0,0.016522667060295742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,64,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,64,128,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,64,128,1,fp8,fp8,0,0.015541333705186844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,64,0,1,float16,fp8,0,0.016778666526079178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,64,128,1,float16,float16,0,0.02752533306678136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,64,0,1,float16,float16,0,0.02770666778087616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,64,128,1,float16,fp8,0,0.02731200059254964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,64,128,1,fp8,fp8,0,0.025487999121348064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,64,0,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,64,0,1,fp8,fp8,0,0.02754666656255722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,64,128,1,float16,float16,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,64,0,1,float16,float16,0,0.019589333484570186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,64,128,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,64,128,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,64,0,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,64,0,1,fp8,fp8,0,0.01988799994190534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,64,128,1,float16,float16,0,0.02139200021823247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,64,128,1,float16,fp8,0,0.019386666516462963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,64,0,1,float16,float16,0,0.02102400114138921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,64,128,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,64,0,1,float16,fp8,0,0.02120000123977661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,64,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,64,128,1,float16,float16,0,0.01584533353646596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,64,0,1,float16,float16,0,0.017423999806245167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,64,128,1,float16,fp8,0,0.017551999539136887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,64,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,64,128,1,fp8,fp8,0,0.01756799966096878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,64,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,64,128,1,float16,float16,0,0.015135999768972397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,64,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,64,128,1,float16,fp8,0,0.016293333222468693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,64,128,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,64,0,1,fp8,fp8,0,0.015386667102575302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,64,128,1,float16,fp8,0,0.015381333728631338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,64,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,64,128,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,64,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,64,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,64,128,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,64,128,1,float16,float16,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,64,128,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,64,128,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,64,0,1,float16,fp8,0,0.01578666642308235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,64,128,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,64,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,64,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,64,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,64,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,64,128,1,float16,fp8,0,0.015392000476519266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,64,0,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,64,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,64,128,1,float16,float16,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,64,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,64,128,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,64,128,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,64,0,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,64,128,1,float16,float16,0,0.014975999792416891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,64,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,64,128,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,64,128,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,64,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,64,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,64,128,1,float16,float16,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,64,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,64,128,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,64,128,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,64,0,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,64,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,64,128,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,64,0,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,64,128,1,float16,float16,0,0.014949332922697067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,64,0,1,float16,float16,0,0.015370666980743408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,64,128,1,float16,fp8,0,0.017509333789348602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,64,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,64,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,64,128,1,float16,float16,0,0.014997333288192749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,64,128,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,64,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,64,128,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,64,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,64,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,64,128,1,float16,float16,0,0.025600001215934753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,64,0,1,float16,float16,0,0.02518400053183238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,64,128,1,float16,fp8,0,0.02571733295917511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,64,128,1,fp8,fp8,0,0.02516266703605652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,64,0,1,float16,fp8,0,0.02584533393383026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,64,128,1,float16,float16,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,64,0,1,fp8,fp8,0,0.02554133286078771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,64,0,1,float16,float16,0,0.019248000035683315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,64,128,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,64,0,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,64,128,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,64,0,1,fp8,fp8,0,0.019487999379634857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,64,128,1,float16,float16,0,0.019152000546455383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,64,0,1,float16,float16,0,0.01926400015751521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,64,128,1,float16,fp8,0,0.019754666835069656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,64,128,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,64,0,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,64,0,1,fp8,fp8,0,0.019424000134070713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,64,0,1,float16,float16,0,0.015557333827018738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,64,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,64,128,1,float16,float16,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,64,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,64,128,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,64,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,64,0,1,float16,float16,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,64,128,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,64,128,1,float16,float16,0,0.015317333241303762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,64,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,64,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,64,128,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,64,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,64,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,64,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,64,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,64,128,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,64,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,64,0,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,64,128,1,float16,float16,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,64,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,64,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,64,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,64,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,64,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,64,128,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,64,128,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,64,0,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,64,0,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,64,128,1,float16,fp8,0,0.015386667102575302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,64,128,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,64,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,64,128,1,float16,float16,0,0.01533866673707962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,64,128,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,64,128,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,64,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,64,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,64,128,1,fp8,fp8,0,0.015813333292802174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,64,0,1,float16,fp8,0,0.015594666202863058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,64,0,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,64,128,1,float16,float16,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,64,0,1,float16,fp8,0,0.015376000354687372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,64,128,1,fp8,fp8,0,0.016480000068744022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,64,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,64,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,64,128,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,64,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,64,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,64,128,1,float16,fp8,0,0.015909332782030106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,64,128,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,64,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,64,0,1,float16,float16,0,0.016704000532627106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,64,128,1,float16,float16,0,0.016037333756685257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,64,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,64,0,1,float16,fp8,0,0.015477333217859268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,0,0.025087999800841015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,0,0.023520000278949738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,0,0.02532266577084859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,2,1,64,128,1,fp8,fp8,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,0,0.025045332809289295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,2,1,64,0,1,fp8,fp8,0,0.02513066679239273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,2,2,64,128,1,float16,float16,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,0,0.019141333798567455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,2,2,64,128,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,0,0.01950399950146675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,2,2,64,128,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,2,2,64,0,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,0,0.01969066634774208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,0,0.01882133384545644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,2,1,64,128,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,0,0.019733333339293797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,2,1,64,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,2,2,64,128,1,float16,float16,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,2,2,64,128,1,float16,fp8,0,0.017722666263580322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,2,2,64,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,2,2,64,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,2,1,64,128,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,2,1,64,0,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,2,2,64,128,1,float16,float16,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,2,2,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,2,2,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,2,2,64,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,0,0.01670933390657107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,2,1,64,128,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,2,1,64,0,1,fp8,fp8,0,0.016778666526079178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,2,2,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,2,2,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,2,2,64,128,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,0,0.015626666446526844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,2,2,64,0,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,0,0.015376000354687372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,2,1,64,128,1,fp8,fp8,0,0.01635733370979627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,2,1,64,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,2,2,64,128,1,float16,float16,0,0.015135999768972397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,2,2,64,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,2,2,64,128,1,fp8,fp8,0,0.014767999450365702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,0,0.015370666980743408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,2,2,64,0,1,fp8,fp8,0,0.016197333733240765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,0,0.014917333920796713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,2,1,64,128,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,2,1,64,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,2,2,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,2,2,64,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,2,2,64,128,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,2,2,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,0,0.015509333461523056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,2,1,64,128,1,fp8,fp8,0,0.015658666690190632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,2,1,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,2,2,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,2,2,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,2,2,64,128,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,2,2,64,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,0,0.014874666929244995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,2,1,64,128,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,2,1,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,2,2,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,2,2,64,128,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,2,2,64,128,1,fp8,fp8,0,0.0161013330022494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,2,2,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,0,0.015402667224407196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,2,1,64,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,2,1,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,64,128,1,float16,float16,0,0.047509332497914634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,64,128,1,float16,fp8,0,0.050186668833096824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,64,0,1,float16,float16,0,0.29841599861780804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,64,128,1,fp8,fp8,0,0.04696533580621084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,64,0,1,float16,fp8,0,0.3011946678161621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,64,128,1,float16,float16,0,0.033285332222779594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,64,0,1,fp8,fp8,0,0.2733760078748067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,64,0,1,float16,float16,0,0.19855999946594238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,64,128,1,float16,fp8,0,0.03323733309904734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,64,128,1,fp8,fp8,0,0.03334933271010717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,64,128,1,float16,float16,0,0.031354665756225586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,64,0,1,float16,fp8,0,0.19983466466267905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,64,0,1,fp8,fp8,0,0.1850666602452596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,64,128,1,float16,fp8,0,0.030784000953038532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,64,128,1,fp8,fp8,0,0.02977066735426585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,64,0,1,float16,float16,0,0.19316800435384116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,64,128,1,float16,float16,0,0.041893333196640015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,64,0,1,float16,fp8,0,0.19340799252192178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,64,0,1,fp8,fp8,0,0.1811306675275167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,64,128,1,float16,fp8,0,0.04372266431649526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,64,0,1,float16,float16,0,0.19486399491628012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,64,128,1,fp8,fp8,0,0.04194133480389913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,64,128,1,float16,float16,0,0.02958933264017105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,64,0,1,float16,fp8,0,0.1954560081164042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,64,0,1,fp8,fp8,0,0.17864533265431723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,64,0,1,float16,float16,0,0.15037866433461508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,64,128,1,fp8,fp8,0,0.02945599953333537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,64,128,1,float16,fp8,0,0.03128000100453695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,64,128,1,float16,float16,0,0.029146666328112285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,64,0,1,float16,fp8,0,0.1506666640440623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,64,0,1,fp8,fp8,0,0.14215999841690063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,64,128,1,float16,fp8,0,0.02792000025510788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,64,0,1,float16,float16,0,0.14867732922236124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,64,128,1,fp8,fp8,0,0.027290667096773785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,64,128,1,float16,float16,0,0.03961600114901861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,64,0,1,float16,fp8,0,0.14890133341153464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,64,0,1,fp8,fp8,0,0.14033066232999167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,64,128,1,float16,fp8,0,0.03979199876387914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,64,128,1,fp8,fp8,0,0.0377866675456365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,64,0,1,float16,float16,0,0.1441920002301534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,64,0,1,float16,fp8,0,0.1446293294429779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,64,128,1,float16,float16,0,0.027701333165168762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,64,0,1,fp8,fp8,0,0.13381866614023843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,64,0,1,float16,float16,0,0.13010666767756143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,64,128,1,float16,fp8,0,0.027471999327341717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,64,128,1,fp8,fp8,0,0.02738133321205775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,64,0,1,float16,fp8,0,0.1297653317451477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,64,128,1,float16,float16,0,0.027274665733178455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,64,0,1,float16,float16,0,0.12762133280436197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,64,128,1,float16,fp8,0,0.02762666592995326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,64,0,1,fp8,fp8,0,0.12219732999801636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,64,128,1,fp8,fp8,0,0.0271519993742307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,64,0,1,float16,fp8,0,0.12575999895731607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,64,0,1,fp8,fp8,0,0.11910933256149292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,64,128,1,float16,float16,0,0.046240001916885376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,64,0,1,float16,float16,0,0.1701493263244629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,64,128,1,float16,fp8,0,0.04775999983151754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,64,128,1,fp8,fp8,0,0.04562666515509287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,64,0,1,float16,fp8,0,0.17165333032608032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,64,128,1,float16,float16,0,0.030400000512599945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,64,0,1,float16,float16,0,0.11171733339627583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,64,0,1,fp8,fp8,0,0.1584213376045227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,64,128,1,float16,fp8,0,0.031888000667095184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,64,128,1,fp8,fp8,0,0.029343999922275543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,64,0,1,float16,fp8,0,0.11148266990979512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,64,0,1,fp8,fp8,0,0.10412266850471497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,64,128,1,float16,float16,0,0.027093333502610523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,64,0,1,float16,float16,0,0.10729066530863444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,64,128,1,float16,fp8,0,0.02769600103298823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,64,128,1,fp8,fp8,0,0.025392000873883564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,64,0,1,float16,fp8,0,0.10764267047246297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,64,0,1,fp8,fp8,0,0.1011306643486023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,64,128,1,float16,float16,0,0.02699200063943863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,64,0,1,float16,float16,0,0.10523200035095215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,64,128,1,fp8,fp8,0,0.02518400053183238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,64,128,1,float16,fp8,0,0.02589333305756251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,64,0,1,float16,fp8,0,0.10345066587130229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,64,0,1,fp8,fp8,0,0.09776000181833903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,64,128,1,float16,float16,0,0.04234133164087931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,64,128,1,float16,fp8,0,0.04322666426499685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,64,0,1,float16,float16,0,0.11440533399581909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,64,128,1,fp8,fp8,0,0.04180799921353658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,64,0,1,float16,fp8,0,0.11550399661064148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,64,128,1,float16,float16,0,0.027488000690937042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,64,0,1,fp8,fp8,0,0.10877333084742229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,64,0,1,float16,float16,0,0.08807466427485149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,64,128,1,float16,fp8,0,0.02923733244339625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,64,128,1,fp8,fp8,0,0.02757333219051361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,64,0,1,float16,fp8,0,0.08873599767684937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,64,0,1,fp8,fp8,0,0.08309333523114522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,64,128,1,float16,float16,0,0.02554133286078771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,64,128,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,64,0,1,float16,float16,0,0.08293333152929942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,64,128,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,64,0,1,float16,fp8,0,0.08284799754619598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,64,0,1,fp8,fp8,0,0.07885333398977916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,64,128,1,float16,float16,0,0.023423999547958374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,64,0,1,float16,float16,0,0.08267733454704285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,64,128,1,float16,fp8,0,0.025306666890780132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,64,128,1,fp8,fp8,0,0.02325333406527837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,64,0,1,float16,fp8,0,0.08288000027338664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,64,0,1,fp8,fp8,0,0.0769706666469574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,64,128,1,float16,float16,0,0.04604800045490265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,64,0,1,float16,float16,0,0.10462400317192078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,64,128,1,float16,fp8,0,0.04609066744645437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,64,128,1,fp8,fp8,0,0.04567466676235199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,64,0,1,float16,fp8,0,0.10630399982134502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,64,128,1,float16,float16,0,0.029845332105954487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,64,0,1,fp8,fp8,0,0.0993226667245229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,64,0,1,float16,float16,0,0.06866666674613953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,64,128,1,float16,fp8,0,0.03140799949566523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,64,128,1,fp8,fp8,0,0.029701332251230877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,64,0,1,float16,fp8,0,0.07018666466077168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,64,128,1,float16,float16,0,0.025253333151340485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,64,0,1,fp8,fp8,0,0.06454400221506755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,64,0,1,float16,float16,0,0.06345599889755249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,64,128,1,float16,fp8,0,0.025333332518736523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,64,128,1,fp8,fp8,0,0.02386133372783661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,64,0,1,float16,fp8,0,0.06409066418806712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,64,0,1,fp8,fp8,0,0.05913599828879038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,64,128,1,float16,float16,0,0.023445333043734234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,64,0,1,float16,float16,0,0.060506666700045265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,64,128,1,float16,fp8,0,0.023178666830062866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,64,128,1,fp8,fp8,0,0.023103999594847362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,64,0,1,float16,fp8,0,0.0601440022389094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,64,128,1,float16,float16,0,0.023408000667889912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,64,0,1,fp8,fp8,0,0.05807466804981232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,64,0,1,float16,float16,0,0.05996266504128774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,64,128,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,64,128,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,64,0,1,float16,fp8,0,0.06021333237489065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,64,0,1,fp8,fp8,0,0.05606933434804281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,64,128,1,float16,float16,0,0.04195199906826019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,64,0,1,float16,float16,0,0.0747519979874293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,64,128,1,float16,fp8,0,0.04322133461634318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,64,128,1,fp8,fp8,0,0.04138666639725367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,64,0,1,float16,fp8,0,0.07599466542402904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,64,0,1,fp8,fp8,0,0.06878933310508728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,64,128,1,float16,float16,0,0.02789866675933202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,64,0,1,float16,float16,0,0.056143999099731445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,64,128,1,float16,fp8,0,0.028229333460330963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,64,128,1,fp8,fp8,0,0.027424000203609467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,64,0,1,float16,fp8,0,0.05614933371543884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,64,0,1,fp8,fp8,0,0.05378133555253347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,64,128,1,float16,float16,0,0.02498133232196172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,64,0,1,float16,float16,0,0.051813334226608276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,64,128,1,float16,fp8,0,0.02534399926662445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,64,128,1,fp8,fp8,0,0.02363733450571696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,64,0,1,float16,fp8,0,0.05189866820971171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,64,0,1,fp8,fp8,0,0.04987200101216634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,64,128,1,float16,float16,0,0.021589333812395733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,64,0,1,float16,float16,0,0.04994133114814758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,64,128,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,64,128,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,64,0,1,float16,fp8,0,0.04993600149949392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,64,0,1,fp8,fp8,0,0.04608533283074697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,64,128,1,float16,float16,0,0.023205332458019257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,64,0,1,float16,float16,0,0.04920533299446106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,64,128,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,64,128,1,fp8,fp8,0,0.021381333470344543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,64,0,1,float16,fp8,0,0.04975999891757965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,64,0,1,fp8,fp8,0,0.04606399933497111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,64,128,1,float16,float16,0,0.04725866516431173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,64,0,1,float16,float16,0,0.0719946672519048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,64,128,1,float16,fp8,0,0.048063998421033226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,64,128,1,fp8,fp8,0,0.046069333950678505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,64,0,1,float16,fp8,0,0.07270933190981548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,64,0,1,fp8,fp8,0,0.06937066713968913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,64,128,1,float16,float16,0,0.031152000029881794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,64,0,1,float16,float16,0,0.04789866507053375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,64,128,1,float16,fp8,0,0.03160000095764796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,64,0,1,float16,fp8,0,0.04810666541258494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,64,128,1,fp8,fp8,0,0.0323786661028862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,64,0,1,fp8,fp8,0,0.046037331223487854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,64,128,1,float16,float16,0,0.023445333043734234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,64,0,1,float16,float16,0,0.04185600082079569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,64,128,1,float16,fp8,0,0.02550400048494339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,64,128,1,fp8,fp8,0,0.025370667378107708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,64,0,1,float16,fp8,0,0.0414986660083135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,64,0,1,fp8,fp8,0,0.03980266551176707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,64,0,1,float16,float16,0,0.040752001106739044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,64,128,1,float16,float16,0,0.02345066765944163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,64,128,1,float16,fp8,0,0.023498666783173878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,64,128,1,fp8,fp8,0,0.0232640008131663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,64,0,1,float16,fp8,0,0.039642666776975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,64,128,1,float16,float16,0,0.023226665953795116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,64,0,1,fp8,fp8,0,0.03825599948565165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,64,0,1,float16,float16,0,0.039488000174363456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,64,128,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,64,128,1,fp8,fp8,0,0.02143999934196472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,64,0,1,float16,fp8,0,0.03987200061480204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,64,0,1,fp8,fp8,0,0.03760000069936117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,64,128,1,float16,float16,0,0.02146666745344798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,64,0,1,float16,float16,0,0.038831998904546104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,64,128,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,64,128,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,64,0,1,float16,fp8,0,0.03756266583998998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,64,0,1,fp8,fp8,0,0.03585066646337509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,64,128,1,float16,float16,0,0.041759997606277466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,64,0,1,float16,float16,0,0.0540533314148585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,64,128,1,float16,fp8,0,0.04363733530044556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,64,128,1,fp8,fp8,0,0.04208533465862274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,64,0,1,float16,fp8,0,0.05529599885145823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,64,128,1,float16,float16,0,0.027445333699385326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,64,0,1,fp8,fp8,0,0.05260799825191498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,64,0,1,float16,float16,0,0.039520000418027244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,64,128,1,float16,fp8,0,0.02934933453798294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,64,128,1,fp8,fp8,0,0.028037334481875103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,64,0,1,float16,fp8,0,0.03994666785001755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,64,0,1,fp8,fp8,0,0.039503999054431915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,64,128,1,float16,float16,0,0.025573333104451496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,64,0,1,float16,float16,0,0.03547733277082443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,64,128,1,float16,fp8,0,0.025285333395004272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,64,0,1,float16,fp8,0,0.035904000202814736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,64,128,1,fp8,fp8,0,0.023520000278949738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,64,0,1,fp8,fp8,0,0.033626665671666466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,64,128,1,float16,float16,0,0.023221333821614582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,64,0,1,float16,float16,0,0.035504000882307686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,64,128,1,float16,fp8,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,64,128,1,fp8,fp8,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,64,0,1,float16,fp8,0,0.03389866650104523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,64,0,1,fp8,fp8,0,0.03172266731659571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,64,128,1,float16,float16,0,0.021061333517233532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,64,0,1,float16,float16,0,0.03198933353026708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,64,128,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,64,128,1,fp8,fp8,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,64,0,1,float16,fp8,0,0.032602667808532715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,64,0,1,fp8,fp8,0,0.03139200061559677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,64,128,1,float16,float16,0,0.021274665991465252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,64,0,1,float16,float16,0,0.031680000325044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,64,128,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,64,128,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,64,0,1,float16,fp8,0,0.033376000821590424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,64,0,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,64,128,1,float16,float16,0,0.03938133269548416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,64,128,1,float16,fp8,0,0.03994666785001755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,64,0,1,float16,float16,0,0.0496373325586319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,64,128,1,fp8,fp8,0,0.03781333317359289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,64,0,1,float16,fp8,0,0.04980266590913137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,64,0,1,fp8,fp8,0,0.04566933214664459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,64,128,1,float16,float16,0,0.026181332767009735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,64,0,1,float16,float16,0,0.03342933456103007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,64,128,1,float16,fp8,0,0.027082666754722595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,64,128,1,fp8,fp8,0,0.027232001225153606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,64,0,1,float16,fp8,0,0.03372266640265783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,64,0,1,fp8,fp8,0,0.03254399945338567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,64,128,1,float16,float16,0,0.023472001155217487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,64,0,1,float16,float16,0,0.029301332930723827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,64,128,1,float16,fp8,0,0.023503998915354412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,64,128,1,fp8,fp8,0,0.0233599990606308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,64,0,1,float16,fp8,0,0.02962133288383484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,64,0,1,fp8,fp8,0,0.029130667448043823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,64,128,1,float16,float16,0,0.021456000705560047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,64,0,1,float16,float16,0,0.027290667096773785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,64,128,1,float16,fp8,0,0.022976001103719074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,64,128,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,64,0,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,64,0,1,fp8,fp8,0,0.025370667378107708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,64,128,1,float16,float16,0,0.021125334004561108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,64,0,1,float16,float16,0,0.02651199946800868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,64,128,1,float16,fp8,0,0.021066665649414062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,64,128,1,fp8,fp8,0,0.01947733387351036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,64,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,64,0,1,fp8,fp8,0,0.025466665625572205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,64,128,1,float16,float16,0,0.019519999623298645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,64,0,1,float16,float16,0,0.027024000883102417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,64,128,1,float16,fp8,0,0.02144533395767212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,64,128,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,64,0,1,float16,fp8,0,0.027376001079877216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,64,0,1,fp8,fp8,0,0.02555199960867564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,64,128,1,float16,float16,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,64,0,1,float16,float16,0,0.027237333357334137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,64,128,1,float16,fp8,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,64,128,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,64,0,1,float16,fp8,0,0.027221334477265675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,64,0,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,64,128,1,float16,float16,0,0.03972800076007843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,64,0,1,float16,float16,0,0.04167466859022776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,64,128,1,float16,fp8,0,0.04001066585381826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,64,0,1,float16,fp8,0,0.04190933207670847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,64,0,1,fp8,fp8,0,0.03950933367013931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,64,128,1,float16,float16,0,0.026522666215896606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,64,128,1,fp8,fp8,0,0.03853866706291834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,64,0,1,float16,float16,0,0.027653334041436512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,64,128,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,64,128,1,fp8,fp8,0,0.025445332129796345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,64,0,1,fp8,fp8,0,0.02752000093460083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,64,128,1,float16,float16,0,0.023376000424226124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,64,0,1,float16,fp8,0,0.027786667148272198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,64,0,1,float16,float16,0,0.02442666639884313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,64,128,1,float16,fp8,0,0.0234400009115537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,64,128,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,64,0,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,64,0,1,float16,fp8,0,0.023914667467276256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,64,128,1,float16,float16,0,0.02145066608985265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,64,0,1,float16,float16,0,0.021407999098300934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,64,128,1,float16,fp8,0,0.023647998770078022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,64,128,1,fp8,fp8,0,0.021381333470344543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,64,0,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,64,0,1,fp8,fp8,0,0.021541332205136616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,64,128,1,float16,float16,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,64,0,1,float16,float16,0,0.021290667355060577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,64,128,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,64,0,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,64,128,1,float16,fp8,0,0.021733333667119343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,64,0,1,fp8,fp8,0,0.02107200026512146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,64,128,1,float16,float16,0,0.021242665747801464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,64,0,1,float16,float16,0,0.021359999974568684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,64,128,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,64,128,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,64,0,1,float16,fp8,0,0.021290667355060577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,64,0,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,64,128,1,float16,float16,0,0.02144533395767212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,64,0,1,float16,float16,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,64,128,1,float16,fp8,0,0.02120000123977661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,64,128,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,64,0,1,float16,fp8,0,0.021583999196688335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,64,0,1,fp8,fp8,0,0.019434666881958645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,64,128,1,float16,float16,0,0.019509332875410717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,64,0,1,float16,float16,0,0.021317332983016968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,64,128,1,float16,fp8,0,0.020997333029905956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,64,128,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,64,0,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,64,0,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,64,128,1,float16,float16,0,0.02513066679239273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,64,0,1,float16,float16,0,0.025253333151340485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,64,128,1,float16,fp8,0,0.025418666501839954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,64,128,1,fp8,fp8,0,0.025231999655564625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,64,0,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,64,0,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,64,128,1,float16,float16,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,64,0,1,float16,float16,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,64,128,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,64,128,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,64,0,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,64,0,1,fp8,fp8,0,0.021509334444999695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,64,128,1,float16,float16,0,0.019632000476121902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,64,0,1,float16,float16,0,0.021551998953024547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,64,128,1,float16,fp8,0,0.019141333798567455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,64,128,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,64,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,64,0,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,64,128,1,float16,float16,0,0.020917333662509918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,64,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,64,128,1,float16,fp8,0,0.019600000232458115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,64,128,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,64,0,1,float16,fp8,0,0.018885333091020584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,64,0,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,64,128,1,float16,float16,0,0.019141333798567455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,64,0,1,float16,float16,0,0.019023999571800232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,64,128,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,64,128,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,64,0,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,64,0,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,64,128,1,float16,float16,0,0.018719999740521114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,64,0,1,float16,float16,0,0.018922666708628338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,64,128,1,float16,fp8,0,0.019434666881958645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,64,128,1,fp8,fp8,0,0.017551999539136887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,64,0,1,float16,fp8,0,0.01964266722400983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,64,0,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,64,128,1,float16,float16,0,0.019120000302791595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,64,0,1,float16,float16,0,0.01924266666173935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,64,128,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,64,128,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,64,0,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,64,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,64,128,1,float16,float16,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,64,0,1,float16,float16,0,0.017680000513792038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,64,128,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,64,128,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,64,0,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,64,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,64,128,1,float16,float16,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,64,0,1,float16,float16,0,0.01947733387351036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,64,128,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,64,128,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,64,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,64,0,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,64,128,1,float16,float16,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,64,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,64,128,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,64,128,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,64,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,64,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,64,128,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,64,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,64,0,1,float16,float16,0,0.01682666689157486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,64,128,1,float16,float16,0,0.01643199970324834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,64,128,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,64,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,64,0,1,float16,fp8,0,0.01747200017174085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,64,128,1,float16,float16,0,0.027104000250498455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,64,128,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,64,0,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,64,0,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,64,128,1,float16,float16,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,64,0,1,float16,float16,0,0.015461333096027374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,64,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,64,0,1,float16,fp8,0,0.018325333793958027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,64,0,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,64,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,64,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,64,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,64,0,1,float16,fp8,0,0.016741332908471424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,64,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,64,128,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,64,128,1,fp8,fp8,0,0.01534933348496755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,64,0,1,fp8,fp8,0,0.016783999900023144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,64,128,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,64,0,1,float16,float16,0,0.018079999834299088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,64,128,1,float16,fp8,0,0.0189280000825723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,64,128,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,64,0,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,64,128,1,float16,float16,0,0.016858667135238647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,64,0,1,float16,float16,0,0.015674666812022526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,64,128,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,64,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,64,0,1,float16,float16,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,64,128,1,float16,float16,0,0.017535999417304993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,64,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,64,0,1,float16,fp8,0,0.017525333911180496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,64,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,64,128,1,float16,float16,0,0.015125333021084467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,64,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,64,128,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,64,128,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,64,0,1,float16,fp8,0,0.016538667182127636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,64,0,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,64,128,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,64,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,64,0,1,fp8,fp8,0,0.016778666526079178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,64,128,1,float16,float16,0,0.01685333376129468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,64,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,64,128,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,64,128,1,fp8,fp8,0,0.018698666244745255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,64,0,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,64,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,64,128,1,float16,float16,0,0.016016000260909397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,64,128,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,64,128,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,64,0,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,64,128,1,float16,float16,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,64,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,64,128,1,float16,float16,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,64,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,64,128,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,64,0,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,64,128,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,64,128,1,float16,float16,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,64,0,1,float16,float16,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,64,128,1,float16,fp8,0,0.015728000551462173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,64,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,64,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,64,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,64,128,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,64,128,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,64,0,1,float16,fp8,0,0.01551466683546702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,64,0,1,fp8,fp8,0,0.015557333827018738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,64,128,1,float16,float16,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,64,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,64,128,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,64,128,1,fp8,fp8,0,0.016773333152135212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,64,128,1,float16,float16,0,0.016741332908471424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,64,0,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,64,0,1,float16,float16,0,0.016693333784739178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,64,128,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,64,128,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,64,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,64,128,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,64,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,64,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,64,128,1,float16,float16,0,0.015034666905800501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,64,0,1,float16,float16,0,0.016469333320856094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,64,128,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,64,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,64,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,64,0,1,float16,float16,0,0.01545599972208341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,64,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,64,128,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,64,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,64,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,64,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,64,128,1,fp8,fp8,0,0.016000000139077503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,64,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,64,128,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,64,128,1,float16,fp8,0,0.01553600033124288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,64,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,64,0,1,fp8,fp8,0,0.015381333728631338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,64,128,1,float16,float16,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,64,0,1,float16,float16,0,0.016735999534527462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,64,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,64,128,1,float16,float16,0,0.014981333166360855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,64,128,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,64,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,64,0,1,fp8,fp8,0,0.015504000087579092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,64,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,64,128,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,64,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,64,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,64,128,1,float16,float16,0,0.015568000574906668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,64,0,1,float16,float16,0,0.016063999384641647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,64,128,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,64,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,64,128,1,float16,fp8,0,0.015520000209410986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,64,128,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,64,0,1,float16,fp8,0,0.015589332828919092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,64,128,1,float16,float16,0,0.016623999923467636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,64,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,64,128,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,64,0,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,64,0,1,fp8,fp8,0,0.015834666788578033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,1,1,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,0,0.017498667041460674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,1,1,64,128,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,1,1,64,128,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,1,1,64,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,1,1,64,128,1,float16,float16,0,0.015034666905800501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,0,0.015306666493415833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,1,1,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,1,1,64,128,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,1,1,64,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,1,1,64,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,0,0.016309333344300587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,1,1,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,1,1,64,128,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,1,1,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,1,1,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,1,1,64,128,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,1,1,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,1,1,64,128,1,float16,float16,0,0.014837333311637243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,1,1,64,0,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,0,0.015578666081031164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,1,1,64,128,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,1,1,64,128,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,1,1,64,0,1,fp8,fp8,0,0.015743999431530636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,1,1,64,128,1,float16,float16,0,0.01471466695268949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,0,0.015957333147525787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,1,1,64,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,1,1,64,128,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,1,1,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,1,1,64,0,1,fp8,fp8,0,0.01581866666674614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,1,1,64,128,1,float16,fp8,0,0.01588800052801768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,1,1,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,0,0.016399999459584553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,1,1,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,1,1,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,0,0.01568000018596649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,1,1,64,128,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,1,1,64,128,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,0,0.015477333217859268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,1,1,64,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,128,0,1,fp8,fp8,0,38.48961639404297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,128,0,1,float16,fp8,0,63.61268615722656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,128,0,1,float16,float16,0,64.94424438476562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,128,0,1,float16,float16,0,66.32324727376302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,128,0,1,float16,fp8,0,60.00094095865885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,128,0,1,fp8,fp8,0,39.84081013997396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,128,0,1,float16,float16,0,66.45447285970052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,128,0,1,float16,float16,0,33.26660919189453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,128,0,1,fp8,fp8,0,38.51261901855469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,128,0,1,float16,fp8,0,66.87821451822917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,128,0,1,float16,fp8,0,33.4378662109375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,128,0,1,float16,float16,0,30.279685974121094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,128,0,1,float16,fp8,0,34.202649434407554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,128,0,1,fp8,fp8,0,19.535179138183594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,128,0,1,float16,float16,0,30.509872436523438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,128,0,1,float16,fp8,0,33.49553680419922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,128,0,1,fp8,fp8,0,19.55353546142578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,128,0,1,fp8,fp8,0,19.96226628621419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,128,0,1,float16,float16,0,30.810880025227863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,128,0,1,float16,fp8,0,33.77759043375651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,128,0,1,float16,float16,0,17.276922861735027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,128,0,1,float16,fp8,0,15.99167505900065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,128,0,1,fp8,fp8,0,11.214132944742838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,128,0,1,float16,float16,0,16.18548838297526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,128,0,1,float16,fp8,0,15.067184448242188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,128,0,1,fp8,fp8,0,10.189711888631185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,128,0,1,fp8,fp8,0,10.426186879475912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,128,0,1,float16,float16,0,15.954725901285807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,128,0,1,float16,fp8,0,15.740272521972656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,128,0,1,fp8,fp8,0,9.606986363728842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,128,0,1,float16,float16,0,16.926069895426433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,128,0,1,float16,fp8,0,15.89199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,128,0,1,float16,float16,0,8.235482533772787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,128,0,1,fp8,fp8,0,5.512655893961589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,128,0,1,float16,fp8,0,9.474506378173828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,128,0,1,float16,float16,0,8.127861022949219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,128,0,1,fp8,fp8,0,5.113631884256999
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,128,0,1,float16,fp8,0,8.399514516194662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,128,0,1,float16,float16,0,8.139978408813477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,128,0,1,fp8,fp8,0,5.230949401855469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,128,0,1,float16,fp8,0,8.159184137980143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,128,0,1,float16,float16,0,7.9325815836588545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,128,0,1,fp8,fp8,0,5.192432085673015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,128,0,1,float16,fp8,0,8.337850570678711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,128,0,1,fp8,fp8,0,23.46424611409505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,128,0,1,float16,float16,0,35.891578674316406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,128,0,1,float16,fp8,0,40.33049011230469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,128,0,1,float16,float16,0,38.23362731933594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,128,0,1,fp8,fp8,0,25.123209635416668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,128,0,1,float16,fp8,0,35.713236490885414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,128,0,1,fp8,fp8,0,20.59425099690755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,128,0,1,float16,float16,0,37.761889139811196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,128,0,1,float16,fp8,0,40.08531697591146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,128,0,1,float16,float16,0,19.81765365600586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,128,0,1,fp8,fp8,0,13.252698262532553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,128,0,1,float16,fp8,0,20.327322642008465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,128,0,1,float16,float16,0,16.794512430826824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,128,0,1,fp8,fp8,0,11.923482259114584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,128,0,1,float16,fp8,0,20.041243235270183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,128,0,1,float16,float16,0,18.21089045206706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,128,0,1,fp8,fp8,0,12.21280034383138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,128,0,1,float16,fp8,0,19.859349568684895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,128,0,1,fp8,fp8,0,11.541200002034506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,128,0,1,float16,float16,0,18.350757598876953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,128,0,1,float16,fp8,0,19.80193583170573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,128,0,1,float16,float16,0,9.943951924641928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,128,0,1,fp8,fp8,0,6.630074818929036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,128,0,1,float16,fp8,0,9.840149561564127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,128,0,1,fp8,fp8,0,5.464218775431315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,128,0,1,float16,float16,0,10.062378565470377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,128,0,1,float16,fp8,0,10.837407430013021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,128,0,1,float16,float16,0,9.125247955322266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,128,0,1,fp8,fp8,0,6.499856313069661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,128,0,1,float16,fp8,0,9.508965174357096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,128,0,1,fp8,fp8,0,5.999754587809245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,128,0,1,float16,float16,0,8.853301366170248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,128,0,1,float16,fp8,0,9.62657610575358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,128,0,1,float16,float16,0,5.229109446207683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,128,0,1,float16,fp8,0,5.006618817647298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,128,0,1,fp8,fp8,0,3.1039625803629556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,128,0,1,float16,float16,0,5.195349375406901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,128,0,1,float16,fp8,0,4.6175893147786455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,128,0,1,fp8,fp8,0,2.912186622619629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,128,0,1,float16,float16,0,4.586832046508789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,128,0,1,fp8,fp8,0,2.8419198989868164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,128,0,1,float16,fp8,0,4.895093282063802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,128,0,1,float16,float16,0,5.361194610595703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,128,0,1,float16,fp8,0,4.360762596130371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,128,0,1,fp8,fp8,0,2.9758612314860025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,128,0,1,fp8,fp8,0,17.185855865478516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,128,0,1,float16,fp8,0,26.150110880533855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,128,0,1,float16,float16,0,27.345675150553387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,128,0,1,float16,float16,0,25.42223358154297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,128,0,1,fp8,fp8,0,15.209381103515625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,128,0,1,float16,fp8,0,27.37579091389974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,128,0,1,float16,float16,0,28.436192830403645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,128,0,1,fp8,fp8,0,16.417157491048176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,128,0,1,float16,float16,0,14.765237172444662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,128,0,1,fp8,fp8,0,8.52890141805013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,128,0,1,float16,fp8,0,25.82952117919922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,128,0,1,float16,fp8,0,15.281962076822916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,128,0,1,float16,float16,0,13.80828857421875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,128,0,1,fp8,fp8,0,7.937509536743164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,128,0,1,float16,fp8,0,12.89520009358724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,128,0,1,fp8,fp8,0,7.942197163899739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,128,0,1,float16,float16,0,14.297332763671875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,128,0,1,float16,fp8,0,13.89398447672526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,128,0,1,float16,float16,0,13.385920206705729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,128,0,1,fp8,fp8,0,8.758122762044271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,128,0,1,float16,float16,0,7.459103902180989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,128,0,1,float16,fp8,0,13.190714518229166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,128,0,1,fp8,fp8,0,4.70308272043864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,128,0,1,float16,fp8,0,6.481818517049153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,128,0,1,float16,float16,0,7.19276237487793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,128,0,1,float16,fp8,0,6.583706537882487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,128,0,1,fp8,fp8,0,4.321663856506348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,128,0,1,float16,float16,0,6.2326399485270185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,128,0,1,fp8,fp8,0,3.993525187174479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,128,0,1,float16,fp8,0,6.884927749633789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,128,0,1,float16,float16,0,7.2485917409261065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,128,0,1,float16,fp8,0,6.5037492116292315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,128,0,1,fp8,fp8,0,4.0461225509643555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,128,0,1,float16,float16,0,3.655328114827474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,128,0,1,float16,fp8,0,3.9959465662638345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,128,0,1,fp8,fp8,0,2.200928052266439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,128,0,1,float16,float16,0,2.7098347345987954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,128,0,1,fp8,fp8,0,2.1027520497639975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,128,0,1,float16,fp8,0,3.4008798599243164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,128,0,1,float16,float16,0,2.7512426376342773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,128,0,1,float16,fp8,0,2.99729061126709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,128,0,1,fp8,fp8,0,2.1135733922322593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,128,0,1,float16,float16,0,3.5522985458374023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,128,0,1,float16,fp8,0,2.5569279988606772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,128,0,1,fp8,fp8,0,2.107210636138916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,128,0,1,fp8,fp8,0,21.06167475382487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,128,0,1,float16,float16,0,34.71633656819662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,128,0,1,float16,float16,0,37.32481129964193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,128,0,1,float16,fp8,0,36.98509216308594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,128,0,1,float16,fp8,0,33.77090708414713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,128,0,1,fp8,fp8,0,22.85680643717448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,128,0,1,float16,float16,0,37.791760762532554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,128,0,1,float16,fp8,0,37.161669413248696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,128,0,1,float16,float16,0,19.61294428507487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,128,0,1,fp8,fp8,0,20.853525797526043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,128,0,1,float16,float16,0,16.97583516438802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,128,0,1,float16,fp8,0,20.238255818684895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,128,0,1,float16,fp8,0,17.192485809326172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,128,0,1,fp8,fp8,0,10.847700754801432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,128,0,1,float16,fp8,0,16.458805084228516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,128,0,1,float16,float16,0,18.22380828857422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,128,0,1,fp8,fp8,0,10.914384206136068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,128,0,1,fp8,fp8,0,11.173540751139322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,128,0,1,float16,float16,0,17.106107076009113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,128,0,1,float16,fp8,0,18.643792470296223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,128,0,1,float16,float16,0,9.79309336344401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,128,0,1,float16,fp8,0,9.036725362141928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,128,0,1,fp8,fp8,0,6.613637288411458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,128,0,1,fp8,fp8,0,5.431914647420247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,128,0,1,float16,float16,0,8.854661305745443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,128,0,1,float16,fp8,0,8.558736165364584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,128,0,1,fp8,fp8,0,5.1536054611206055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,128,0,1,float16,float16,0,9.825109481811523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,128,0,1,float16,fp8,0,8.418042500813803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,128,0,1,float16,float16,0,8.71235720316569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,128,0,1,fp8,fp8,0,5.5223038991292315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,128,0,1,float16,fp8,0,9.069413503011068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,128,0,1,float16,float16,0,4.942373275756836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,128,0,1,float16,fp8,0,4.9111785888671875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,128,0,1,fp8,fp8,0,3.052432060241699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,128,0,1,float16,float16,0,3.457045237223307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,128,0,1,float16,fp8,0,4.402000109354655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,128,0,1,fp8,fp8,0,2.774928092956543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,128,0,1,fp8,fp8,0,2.695472081502279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,128,0,1,float16,float16,0,4.299354553222656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,128,0,1,float16,fp8,0,4.502911885579427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,128,0,1,float16,float16,0,5.014106750488281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,128,0,1,float16,fp8,0,4.030197461446126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,128,0,1,fp8,fp8,0,2.6608373324076333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,128,0,1,float16,float16,0,2.0440319379170737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,128,0,1,float16,fp8,0,2.147343953450521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,128,0,1,fp8,fp8,0,1.5485599835713704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,128,0,1,float16,float16,0,1.9288585980733235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,128,0,1,float16,fp8,0,1.6783253351847331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,128,0,1,fp8,fp8,0,1.4812374114990234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,128,0,1,float16,float16,0,2.2247680028279624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,128,0,1,float16,fp8,0,1.6705387433369954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,128,0,1,fp8,fp8,0,1.4796427090962727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,128,0,1,float16,fp8,0,2.007391929626465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,128,0,1,float16,float16,0,2.3016799290974936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,128,0,1,fp8,fp8,0,1.4832266171773274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,128,0,1,fp8,fp8,0,12.482293446858725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,128,0,1,float16,float16,0,20.582378387451172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,128,0,1,float16,fp8,0,20.09817123413086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,128,0,1,float16,float16,0,19.768309275309246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,128,0,1,fp8,fp8,0,14.007253011067709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,128,0,1,float16,fp8,0,20.1930669148763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,128,0,1,fp8,fp8,0,12.878475189208984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,128,0,1,float16,float16,0,19.986773173014324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,128,0,1,float16,fp8,0,21.021546681722004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,128,0,1,float16,float16,0,11.348954518636068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,128,0,1,fp8,fp8,0,7.732389450073242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,128,0,1,float16,fp8,0,11.389019012451172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,128,0,1,fp8,fp8,0,6.346517562866211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,128,0,1,float16,float16,0,10.691839853922525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,128,0,1,float16,fp8,0,10.944683074951172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,128,0,1,float16,float16,0,10.175477345784506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,128,0,1,float16,fp8,0,10.068549474080404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,128,0,1,fp8,fp8,0,7.132906595865886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,128,0,1,fp8,fp8,0,6.992826461791992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,128,0,1,float16,fp8,0,9.530378977457682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,128,0,1,float16,float16,0,10.332730611165365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,128,0,1,float16,float16,0,5.547141393025716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,128,0,1,fp8,fp8,0,3.4613545735677085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,128,0,1,float16,fp8,0,4.321989377339681
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,128,0,1,fp8,fp8,0,3.0805867513020835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,128,0,1,float16,float16,0,4.876490592956543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,128,0,1,float16,fp8,0,4.847237269083659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,128,0,1,float16,float16,0,4.698794682820638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,128,0,1,fp8,fp8,0,3.077887852986654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,128,0,1,float16,fp8,0,5.3008371988932295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,128,0,1,float16,fp8,0,4.898687998453776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,128,0,1,fp8,fp8,0,3.6246719360351562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,128,0,1,float16,float16,0,4.705535888671875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,128,0,1,float16,float16,0,2.490565299987793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,128,0,1,float16,fp8,0,2.6625332832336426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,128,0,1,fp8,fp8,0,1.7805493672688801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,128,0,1,fp8,fp8,0,1.6634772618611653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,128,0,1,float16,float16,0,2.140336036682129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,128,0,1,float16,fp8,0,2.98030948638916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,128,0,1,float16,float16,0,1.9326133728027344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,128,0,1,float16,fp8,0,2.2663626670837402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,128,0,1,fp8,fp8,0,1.6722133954366047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,128,0,1,float16,float16,0,2.075599988301595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,128,0,1,float16,fp8,0,2.17958402633667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,128,0,1,fp8,fp8,0,1.6687946319580078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,128,0,1,float16,float16,0,1.30730136235555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,128,0,1,fp8,fp8,0,1.0111839771270752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,128,0,1,float16,fp8,0,1.341034730275472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,128,0,1,float16,float16,0,1.0738986333211262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,128,0,1,float16,fp8,0,1.0689173539479573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,128,0,1,fp8,fp8,0,0.9550453027089437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,128,0,1,float16,float16,0,1.0946239630381267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,128,0,1,fp8,fp8,0,0.9577493667602539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,128,0,1,float16,fp8,0,1.0730613072713215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,128,0,1,float16,float16,0,1.0786346594492595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,128,0,1,fp8,fp8,0,0.9597012996673584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,128,0,1,float16,fp8,0,1.0788319905598958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,128,0,1,fp8,fp8,0,12.92258071899414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,128,0,1,float16,float16,0,20.003616333007812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,128,0,1,float16,fp8,0,21.473289489746094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,128,0,1,float16,float16,0,19.383472442626953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,128,0,1,fp8,fp8,0,13.531258900960287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,128,0,1,float16,fp8,0,20.14297612508138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,128,0,1,fp8,fp8,0,12.75234603881836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,128,0,1,float16,float16,0,20.084794362386067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,128,0,1,float16,fp8,0,22.31482696533203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,128,0,1,float16,float16,0,10.889291127522787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,128,0,1,float16,fp8,0,11.206597646077475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,128,0,1,float16,float16,0,9.428112030029297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,128,0,1,float16,fp8,0,10.413471857706705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,128,0,1,fp8,fp8,0,6.067056020100911
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,128,0,1,float16,float16,0,9.21237309773763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,128,0,1,float16,fp8,0,9.112954457600912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,128,0,1,fp8,fp8,0,6.135253270467122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,128,0,1,float16,float16,0,9.054416020711264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,128,0,1,float16,fp8,0,9.430949529012045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,128,0,1,fp8,fp8,0,6.287274678548177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,128,0,1,float16,fp8,0,5.264767964680989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,128,0,1,float16,float16,0,4.906869252522786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,128,0,1,fp8,fp8,0,3.4608532587687173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,128,0,1,float16,float16,0,4.70799986521403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,128,0,1,fp8,fp8,0,3.2349812189737954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,128,0,1,float16,fp8,0,4.6260372797648115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,128,0,1,float16,float16,0,5.069253285725911
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,128,0,1,float16,fp8,0,4.459413210550944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,128,0,1,fp8,fp8,0,3.1646080017089844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,128,0,1,float16,float16,0,5.088720003763835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,128,0,1,float16,fp8,0,5.348410924275716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,128,0,1,fp8,fp8,0,3.118266741434733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,128,0,1,float16,float16,0,2.0372106234232583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,128,0,1,float16,fp8,0,2.022101402282715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,128,0,1,fp8,fp8,0,1.7919786771138508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,128,0,1,float16,float16,0,1.8494720458984375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,128,0,1,float16,fp8,0,1.9183093706766765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,128,0,1,fp8,fp8,0,1.6320373217264812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,128,0,1,float16,float16,0,2.153829256693522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,128,0,1,float16,fp8,0,1.8558133443196614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,128,0,1,fp8,fp8,0,1.6334400177001953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,128,0,1,float16,float16,0,1.8614187240600586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,128,0,1,float16,fp8,0,2.043978691101074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,128,0,1,fp8,fp8,0,1.6468000411987305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,128,0,1,float16,float16,0,1.1654133001963298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,128,0,1,float16,fp8,0,1.0986506938934326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,128,0,1,fp8,fp8,0,0.9792693456013998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,128,0,1,float16,float16,0,1.0196746985117595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,128,0,1,float16,fp8,0,1.0182080268859863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,128,0,1,fp8,fp8,0,0.9048053423563639
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,128,0,1,float16,float16,0,1.0297919909159343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,128,0,1,float16,fp8,0,1.0199573040008545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,128,0,1,fp8,fp8,0,0.9769919713338217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,128,0,1,float16,float16,0,1.0208640098571777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,128,0,1,float16,fp8,0,1.0249280134836833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,128,0,1,float16,float16,0,0.6208586692810059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,128,0,1,fp8,fp8,0,0.9090987046559652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,128,0,1,float16,fp8,0,0.6321386496225992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,128,0,1,fp8,fp8,0,0.5773439804712931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,128,0,1,float16,float16,0,0.596016009648641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,128,0,1,float16,fp8,0,0.5994240045547485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,128,0,1,fp8,fp8,0,0.539408008257548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,128,0,1,float16,float16,0,0.5999679962793986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,128,0,1,float16,fp8,0,0.5992159843444824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,128,0,1,fp8,fp8,0,0.5420159896214803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,128,0,1,float16,float16,0,0.5989813407262167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,128,0,1,float16,fp8,0,0.6022026538848877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,128,0,1,fp8,fp8,0,0.5413279930750529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,128,0,1,float16,float16,0,11.074452718098959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,128,0,1,float16,fp8,0,11.692325592041016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,128,0,1,fp8,fp8,0,7.690256118774414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,128,0,1,float16,float16,0,11.473280588785807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,128,0,1,float16,fp8,0,12.701866149902344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,128,0,1,fp8,fp8,0,7.769280115763347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,128,0,1,fp8,fp8,0,8.24936548868815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,128,0,1,float16,float16,0,11.867691040039062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,128,0,1,float16,fp8,0,11.630699157714844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,128,0,1,float16,float16,0,6.997050603230794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,128,0,1,float16,fp8,0,6.393941243489583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,128,0,1,fp8,fp8,0,4.508874575297038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,128,0,1,float16,float16,0,5.665050506591797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,128,0,1,float16,fp8,0,5.524746576944987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,128,0,1,fp8,fp8,0,3.8688907623291016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,128,0,1,float16,float16,0,6.460959752400716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,128,0,1,fp8,fp8,0,4.223552068074544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,128,0,1,float16,fp8,0,5.735520044962565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,128,0,1,fp8,fp8,0,3.9074827829996743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,128,0,1,float16,fp8,0,6.242789586385091
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,128,0,1,float16,float16,0,6.254042943318685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,128,0,1,float16,float16,0,2.978399912516276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,128,0,1,fp8,fp8,0,2.253887971242269
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,128,0,1,float16,fp8,0,3.2261600494384766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,128,0,1,float16,float16,0,2.376373291015625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,128,0,1,fp8,fp8,0,2.005685329437256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,128,0,1,float16,fp8,0,2.280405362447103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,128,0,1,fp8,fp8,0,2.009338696797689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,128,0,1,float16,float16,0,2.6149120330810547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,128,0,1,float16,fp8,0,2.8489707310994468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,128,0,1,float16,float16,0,2.8193705876668296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,128,0,1,float16,fp8,0,2.680720011393229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,128,0,1,fp8,fp8,0,2.1124000549316406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,128,0,1,float16,float16,0,1.3710026741027832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,128,0,1,fp8,fp8,0,1.1880053679148357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,128,0,1,float16,fp8,0,1.5626880327860515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,128,0,1,float16,float16,0,1.2095733483632405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,128,0,1,float16,fp8,0,1.219061295191447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,128,0,1,fp8,fp8,0,1.0707039833068848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,128,0,1,float16,float16,0,1.2113813559214275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,128,0,1,float16,fp8,0,1.2215413252512615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,128,0,1,fp8,fp8,0,1.0726719697316487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,128,0,1,float16,float16,0,1.2194080352783203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,128,0,1,fp8,fp8,0,1.0782132943471272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,128,0,1,float16,fp8,0,1.2340586980183919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,128,0,1,float16,float16,0,0.7204853693644205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,128,0,1,float16,fp8,0,0.7329760392506918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,128,0,1,fp8,fp8,0,0.6912213166554769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,128,0,1,float16,float16,0,0.676800012588501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,128,0,1,float16,fp8,0,0.679365317026774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,128,0,1,fp8,fp8,0,0.6036159992218018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,128,0,1,float16,float16,0,0.6782399813334147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,128,0,1,fp8,fp8,0,0.6056640148162842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,128,0,1,float16,fp8,0,0.6811467011769613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,128,0,1,float16,float16,0,0.6809066931406657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,128,0,1,fp8,fp8,0,0.6075733502705892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,128,0,1,float16,fp8,0,0.6843360265096029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,128,0,1,float16,float16,0,0.4246079921722412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,128,0,1,float16,fp8,0,0.4333599805831909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,128,0,1,fp8,fp8,0,0.39662400881449383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,128,0,1,float16,float16,0,0.40140267213185626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,128,0,1,fp8,fp8,0,0.36767999331156415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,128,0,1,float16,fp8,0,0.4055306514104207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,128,0,1,float16,float16,0,0.40435198942820233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,128,0,1,float16,fp8,0,0.4066773255666097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,128,0,1,fp8,fp8,0,0.36977068583170575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,128,0,1,float16,float16,0,0.4076480070749919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,128,0,1,float16,fp8,0,0.4073280096054077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,128,0,1,fp8,fp8,0,0.37267200152079266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,128,0,1,float16,fp8,0,11.469381968180338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,128,0,1,float16,float16,0,11.589066823323568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,128,0,1,fp8,fp8,0,8.523930867513021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,128,0,1,float16,float16,0,11.765183766682943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,128,0,1,float16,fp8,0,12.163930257161459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,128,0,1,fp8,fp8,0,8.539445241292318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,128,0,1,fp8,fp8,0,8.596431732177734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,128,0,1,float16,float16,0,11.269845326741537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,128,0,1,float16,fp8,0,12.900639851888021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,128,0,1,float16,float16,0,6.870581309000651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,128,0,1,float16,fp8,0,7.187882741292317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,128,0,1,float16,float16,0,5.75761604309082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,128,0,1,float16,fp8,0,5.817930857340495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,128,0,1,fp8,fp8,0,4.225392023722331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,128,0,1,float16,float16,0,5.971173604329427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,128,0,1,float16,fp8,0,5.9410400390625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,128,0,1,fp8,fp8,0,4.243610699971517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,128,0,1,float16,float16,0,5.838506698608398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,128,0,1,float16,fp8,0,5.691962560017903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,128,0,1,float16,float16,0,3.103791872660319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,128,0,1,fp8,fp8,0,4.275871912638347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,128,0,1,float16,fp8,0,2.9161866505940757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,128,0,1,fp8,fp8,0,2.482288042704264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,128,0,1,float16,float16,0,2.491941293080648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,128,0,1,float16,fp8,0,2.5906240145365396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,128,0,1,fp8,fp8,0,2.1586292584737143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,128,0,1,float16,float16,0,2.7920586268107095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,128,0,1,float16,fp8,0,2.6541813214619956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,128,0,1,fp8,fp8,0,2.1645706494649253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,128,0,1,float16,float16,0,2.678079922993978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,128,0,1,float16,fp8,0,2.6866238911946616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,128,0,1,fp8,fp8,0,2.18666140238444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,128,0,1,float16,float16,0,1.6364960670471191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,128,0,1,fp8,fp8,0,1.2917706966400146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,128,0,1,float16,fp8,0,1.4408853848775227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,128,0,1,float16,float16,0,1.2834826310475667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,128,0,1,float16,fp8,0,1.409824053446452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,128,0,1,fp8,fp8,0,1.1292640368143718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,128,0,1,float16,float16,0,1.2892693678538005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,128,0,1,float16,fp8,0,1.3102453549702961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,128,0,1,fp8,fp8,0,1.1421759923299153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,128,0,1,float16,float16,0,1.3180480003356934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,128,0,1,float16,fp8,0,1.2980426947275798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,128,0,1,float16,float16,0,0.7672693729400635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,128,0,1,fp8,fp8,0,1.1717546780904133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,128,0,1,float16,fp8,0,0.7705439726511637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,128,0,1,fp8,fp8,0,0.6925813357035319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,128,0,1,float16,float16,0,0.6898453235626221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,128,0,1,float16,fp8,0,0.6948106288909912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,128,0,1,fp8,fp8,0,0.6129813194274902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,128,0,1,float16,float16,0,0.6937493483225504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,128,0,1,float16,fp8,0,0.6972479820251465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,128,0,1,fp8,fp8,0,0.6148213148117065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,128,0,1,float16,float16,0,0.6966826915740967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,128,0,1,float16,fp8,0,0.7004746596018473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,128,0,1,fp8,fp8,0,0.6200799942016602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,128,0,1,float16,float16,0,0.4243679841359456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,128,0,1,float16,fp8,0,0.43490131696065265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,128,0,1,fp8,fp8,0,0.39339200655619305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,128,0,1,float16,float16,0,0.3934933344523112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,128,0,1,float16,fp8,0,0.394597331682841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,128,0,1,fp8,fp8,0,0.35371200243632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,128,0,1,float16,float16,0,0.3949493169784546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,128,0,1,float16,fp8,0,0.39849066734313965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,128,0,1,fp8,fp8,0,0.35518932342529297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,128,0,1,float16,float16,0,0.3962133328119914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,128,0,1,float16,fp8,0,0.39829333623250324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,128,0,1,fp8,fp8,0,0.35736533006032306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,128,0,1,float16,float16,0,0.25709333022435504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,128,0,1,float16,fp8,0,0.26207999388376874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,128,0,1,fp8,fp8,0,0.2434719999631246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,128,0,1,float16,float16,0,0.23705067237218222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,128,0,1,float16,fp8,0,0.23708800474802652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,128,0,1,fp8,fp8,0,0.22064000368118286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,128,0,1,float16,float16,0,0.23672000567118326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,128,0,1,float16,fp8,0,0.2387359937032064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,128,0,1,fp8,fp8,0,0.2222986618677775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,128,0,1,float16,float16,0,0.24048533042271933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,128,0,1,float16,fp8,0,0.24099733432133993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,128,0,1,fp8,fp8,0,0.22657599051793417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,128,0,1,float16,float16,0,7.305093129475911
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,128,0,1,float16,fp8,0,7.064586639404297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,128,0,1,fp8,fp8,0,5.683269500732422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,128,0,1,float16,float16,0,6.932517369588216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,128,0,1,float16,fp8,0,7.378384272257487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,128,0,1,fp8,fp8,0,5.716773351033528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,128,0,1,float16,float16,0,7.841471989949544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,128,0,1,float16,fp8,0,7.3538665771484375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,128,0,1,float16,float16,0,4.226874669392903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,128,0,1,fp8,fp8,0,5.794426600138347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,128,0,1,float16,fp8,0,3.884309450785319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,128,0,1,fp8,fp8,0,3.3221972783406577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,128,0,1,float16,float16,0,3.205653190612793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,128,0,1,float16,fp8,0,3.2728745142618814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,128,0,1,fp8,fp8,0,2.8456907272338867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,128,0,1,float16,float16,0,3.3671414057413735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,128,0,1,float16,fp8,0,3.2584479649861655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,128,0,1,fp8,fp8,0,2.8617547353108725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,128,0,1,float16,float16,0,3.3979787826538086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,128,0,1,float16,fp8,0,3.3208961486816406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,128,0,1,fp8,fp8,0,2.8876425425211587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,128,0,1,float16,float16,0,1.868735949198405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,128,0,1,float16,fp8,0,1.8981919288635254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,128,0,1,fp8,fp8,0,1.702064037322998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,128,0,1,float16,float16,0,1.6535253524780273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,128,0,1,float16,fp8,0,1.662842591603597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,128,0,1,fp8,fp8,0,1.4622079531351726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,128,0,1,float16,float16,0,1.6616640090942383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,128,0,1,float16,fp8,0,1.6687839825948079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,128,0,1,fp8,fp8,0,1.4685920079549153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,128,0,1,float16,float16,0,1.7261919975280762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,128,0,1,float16,fp8,0,1.68505064646403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,128,0,1,fp8,fp8,0,1.4835519790649414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,128,0,1,float16,float16,0,0.9719093640645345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,128,0,1,float16,fp8,0,0.9958506425221761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,128,0,1,fp8,fp8,0,0.8937439918518066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,128,0,1,float16,float16,0,0.8741866747538248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,128,0,1,float16,fp8,0,0.8764426708221436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,128,0,1,fp8,fp8,0,0.7738933563232422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,128,0,1,float16,float16,0,0.8789973258972168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,128,0,1,float16,fp8,0,0.8807946840922037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,128,0,1,fp8,fp8,0,0.7744212945302328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,128,0,1,float16,float16,0,0.880400021870931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,128,0,1,float16,fp8,0,0.8879520098368326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,128,0,1,fp8,fp8,0,0.7811840375264486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,128,0,1,float16,float16,0,0.5225173234939575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,128,0,1,float16,fp8,0,0.5372320016225179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,128,0,1,fp8,fp8,0,0.4825119972229004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,128,0,1,float16,float16,0,0.47783466180165607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,128,0,1,float16,fp8,0,0.4785439968109131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,128,0,1,fp8,fp8,0,0.42588265736897785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,128,0,1,float16,float16,0,0.47702932357788086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,128,0,1,float16,fp8,0,0.47843201955159503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,128,0,1,fp8,fp8,0,0.42588265736897785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,128,0,1,float16,float16,0,0.4796533187230428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,128,0,1,float16,fp8,0,0.4846773147583008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,128,0,1,float16,float16,0,0.29967466990152997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,128,0,1,fp8,fp8,0,0.42954134941101074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,128,0,1,float16,fp8,0,0.30633066097895306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,128,0,1,fp8,fp8,0,0.2794346610705058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,128,0,1,float16,float16,0,0.27084799607594806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,128,0,1,float16,fp8,0,0.2718133330345154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,128,0,1,fp8,fp8,0,0.2498240073521932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,128,0,1,float16,float16,0,0.27314666906992596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,128,0,1,float16,fp8,0,0.27391467491785687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,128,0,1,fp8,fp8,0,0.25065600872039795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,128,0,1,float16,float16,0,0.2743840018908183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,128,0,1,float16,fp8,0,0.27452800671259564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,128,0,1,fp8,fp8,0,0.2506986657778422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,128,0,1,float16,float16,0,0.1859626571337382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,128,0,1,float16,fp8,0,0.18972800175348917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,128,0,1,fp8,fp8,0,0.17511467138926187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,128,0,1,float16,float16,0,0.16823999087015787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,128,0,1,float16,fp8,0,0.16842132806777954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,128,0,1,fp8,fp8,0,0.15660799543062845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,128,0,1,float16,float16,0,0.1687999963760376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,128,0,1,float16,fp8,0,0.16928533713022867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,128,0,1,fp8,fp8,0,0.15642666816711426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,128,0,1,float16,float16,0,0.16862932840983072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,128,0,1,float16,fp8,0,0.16936532656351724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,128,0,1,fp8,fp8,0,0.15657066305478415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,128,0,1,float16,float16,0,7.133808135986328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,128,0,1,float16,fp8,0,7.812559763590495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,128,0,1,fp8,fp8,0,6.298325220743815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,128,0,1,float16,float16,0,7.779024124145508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,128,0,1,float16,fp8,0,7.549530665079753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,128,0,1,fp8,fp8,0,7.24458122253418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,128,0,1,float16,float16,0,7.381274541219075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,128,0,1,fp8,fp8,0,7.047936121622722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,128,0,1,float16,float16,0,4.232042630513509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,128,0,1,float16,fp8,0,8.4617919921875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,128,0,1,float16,fp8,0,4.824853261311849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,128,0,1,float16,float16,0,3.4163945515950522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,128,0,1,float16,fp8,0,3.4111626942952475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,128,0,1,float16,float16,0,3.6529067357381186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,128,0,1,fp8,fp8,0,3.159872055053711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,128,0,1,float16,fp8,0,3.743690808614095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,128,0,1,fp8,fp8,0,3.5972159703572593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,128,0,1,float16,float16,0,3.8942079544067383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,128,0,1,float16,fp8,0,3.8210134506225586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,128,0,1,float16,float16,0,2.03276793162028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,128,0,1,fp8,fp8,0,3.4653971989949546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,128,0,1,float16,fp8,0,2.1431679725646973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,128,0,1,fp8,fp8,0,2.004485289255778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,128,0,1,float16,float16,0,1.731829325358073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,128,0,1,float16,fp8,0,1.74181334177653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,128,0,1,fp8,fp8,0,1.597765286763509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,128,0,1,float16,float16,0,1.748400052388509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,128,0,1,float16,fp8,0,1.7516694068908691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,128,0,1,fp8,fp8,0,1.780677318572998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,128,0,1,float16,float16,0,1.7656906445821126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,128,0,1,float16,fp8,0,1.7933120727539062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,128,0,1,fp8,fp8,0,1.67522128423055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,128,0,1,float16,float16,0,1.037216027577718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,128,0,1,float16,fp8,0,1.0208533604939778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,128,0,1,fp8,fp8,0,1.0954346656799316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,128,0,1,float16,float16,0,0.8858559926350912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,128,0,1,float16,fp8,0,0.8832800388336182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,128,0,1,fp8,fp8,0,0.812671979268392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,128,0,1,float16,float16,0,0.8899306456247965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,128,0,1,float16,fp8,0,0.8973759810129801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,128,0,1,fp8,fp8,0,0.8246666590372721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,128,0,1,float16,float16,0,0.9015999635060629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,128,0,1,float16,fp8,0,0.9048426946004232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,128,0,1,fp8,fp8,0,0.8356266816457113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,128,0,1,float16,float16,0,0.5344213247299194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,128,0,1,float16,fp8,0,0.5227733453114828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,128,0,1,float16,float16,0,0.4594080050786336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,128,0,1,fp8,fp8,0,0.5157546599706014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,128,0,1,float16,fp8,0,0.4593493143717448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,128,0,1,fp8,fp8,0,0.41888535022735596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,128,0,1,float16,float16,0,0.4624319871266683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,128,0,1,float16,fp8,0,0.46272532145182294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,128,0,1,fp8,fp8,0,0.4230080048243205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,128,0,1,float16,float16,0,0.4662880102793376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,128,0,1,float16,fp8,0,0.4711680014928182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,128,0,1,fp8,fp8,0,0.432368000348409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,128,0,1,float16,float16,0,0.28490666548411053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,128,0,1,float16,fp8,0,0.27729066212972003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,128,0,1,fp8,fp8,0,0.2725866635640462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,128,0,1,float16,float16,0,0.24665067593256632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,128,0,1,float16,fp8,0,0.24512000878651938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,128,0,1,fp8,fp8,0,0.22259199619293213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,128,0,1,float16,float16,0,0.2462986707687378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,128,0,1,float16,fp8,0,0.24618132909138998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,128,0,1,fp8,fp8,0,0.2244373361269633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,128,0,1,float16,float16,0,0.24782399336496988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,128,0,1,float16,fp8,0,0.24985599517822266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,128,0,1,fp8,fp8,0,0.22914133469263712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,128,0,1,float16,float16,0,0.1588640014330546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,128,0,1,float16,fp8,0,0.15480533242225647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,128,0,1,fp8,fp8,0,0.15012799700101218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,128,0,1,float16,float16,0,0.13221333424250284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,128,0,1,float16,fp8,0,0.13351999719937643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,128,0,1,fp8,fp8,0,0.12147733569145203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,128,0,1,float16,float16,0,0.13377599914868674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,128,0,1,float16,fp8,0,0.13270399967829385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,128,0,1,fp8,fp8,0,0.12410133083661397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,128,0,1,float16,float16,0,0.13355732957522073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,128,0,1,float16,fp8,0,0.13467199603716531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,128,0,1,fp8,fp8,0,0.12557333707809448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,128,0,1,float16,float16,0,0.09315199653307597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,128,0,1,float16,fp8,0,0.08980799714724223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,128,0,1,fp8,fp8,0,0.09108266234397888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,128,0,1,float16,float16,0,0.07886399825414021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,128,0,1,float16,fp8,0,0.07741333544254303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,128,0,1,fp8,fp8,0,0.070592001080513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,128,0,1,float16,float16,0,0.0783786674340566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,128,0,1,float16,fp8,0,0.07898666461308797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,128,0,1,fp8,fp8,0,0.07020266850789388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,128,0,1,float16,float16,0,0.07914666831493378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,128,0,1,float16,fp8,0,0.07879466811815898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,128,0,1,fp8,fp8,0,0.07098666826883952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,128,0,1,float16,fp8,0,6.026314417521159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,128,0,1,float16,float16,0,6.087530771891276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,128,0,1,fp8,fp8,0,5.573855717976888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,128,0,1,float16,float16,0,6.148378372192383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,128,0,1,float16,fp8,0,6.145285288492839
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,128,0,1,fp8,fp8,0,6.327797571818034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,128,0,1,float16,float16,0,6.267791748046875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,128,0,1,float16,fp8,0,6.248266855875651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,128,0,1,float16,float16,0,3.8906453450520835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,128,0,1,fp8,fp8,0,6.121850967407227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,128,0,1,float16,fp8,0,3.6767040888468423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,128,0,1,float16,float16,0,2.9330666859944663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,128,0,1,float16,fp8,0,2.928959846496582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,128,0,1,fp8,fp8,0,2.7162132263183594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,128,0,1,float16,float16,0,3.0280586878458657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,128,0,1,float16,fp8,0,3.0358826319376626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,128,0,1,fp8,fp8,0,3.162917455037435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,128,0,1,float16,float16,0,3.1173652013142905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,128,0,1,float16,fp8,0,3.141279856363932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,128,0,1,fp8,fp8,0,3.0352160135904946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,128,0,1,float16,float16,0,1.8125386238098145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,128,0,1,float16,fp8,0,1.8131413459777832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,128,0,1,fp8,fp8,0,1.7705225944519043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,128,0,1,float16,float16,0,1.4794559478759766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,128,0,1,float16,fp8,0,1.481621265411377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,128,0,1,fp8,fp8,0,1.3726186752319336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,128,0,1,float16,float16,0,1.4924480120340984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,128,0,1,float16,fp8,0,1.499125321706136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,128,0,1,fp8,fp8,0,1.5661439895629883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,128,0,1,float16,float16,0,1.5174719492594402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,128,0,1,float16,fp8,0,1.5298186937967937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,128,0,1,fp8,fp8,0,1.4264373779296875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,128,0,1,float16,float16,0,0.8987786769866943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,128,0,1,float16,fp8,0,0.889695962270101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,128,0,1,fp8,fp8,0,0.8897066911061605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,128,0,1,float16,float16,0,0.7566773096720377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,128,0,1,float16,fp8,0,0.7565706570943197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,128,0,1,fp8,fp8,0,0.6976853211720785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,128,0,1,float16,float16,0,0.7608319918314616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,128,0,1,float16,fp8,0,0.7631306648254395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,128,0,1,fp8,fp8,0,0.7107040087381998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,128,0,1,float16,float16,0,0.7738719781239828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,128,0,1,float16,fp8,0,0.7717973391215006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,128,0,1,fp8,fp8,0,0.7231146494547526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,128,0,1,float16,float16,0,0.46406932671864826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,128,0,1,float16,fp8,0,0.4527946710586548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,128,0,1,fp8,fp8,0,0.45554665724436444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,128,0,1,float16,float16,0,0.3916320006052653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,128,0,1,float16,fp8,0,0.39284801483154297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,128,0,1,fp8,fp8,0,0.3580160140991211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,128,0,1,float16,float16,0,0.39373334248860675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,128,0,1,float16,fp8,0,0.3954133192698161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,128,0,1,fp8,fp8,0,0.36607468128204346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,128,0,1,float16,float16,0,0.40010666847229004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,128,0,1,float16,fp8,0,0.40121599038441974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,128,0,1,fp8,fp8,0,0.3746773401896159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,128,0,1,float16,float16,0,0.24662399291992188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,128,0,1,float16,fp8,0,0.24015466372172037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,128,0,1,fp8,fp8,0,0.24033600091934204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,128,0,1,float16,float16,0,0.20865066846211752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,128,0,1,float16,fp8,0,0.2073813279469808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,128,0,1,fp8,fp8,0,0.19018133481343588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,128,0,1,float16,float16,0,0.20909333229064941
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,128,0,1,float16,fp8,0,0.21022933721542358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,128,0,1,fp8,fp8,0,0.19320533672968546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,128,0,1,float16,float16,0,0.21270400285720825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,128,0,1,float16,fp8,0,0.2124533255894979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,128,0,1,fp8,fp8,0,0.19739200671513876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,128,0,1,float16,float16,0,0.13583999872207642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,128,0,1,float16,fp8,0,0.13217600186665854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,128,0,1,fp8,fp8,0,0.13034133116404215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,128,0,1,float16,float16,0,0.10984533031781514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,128,0,1,float16,fp8,0,0.11202667156855266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,128,0,1,fp8,fp8,0,0.10352533062299092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,128,0,1,float16,float16,0,0.111653337876002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,128,0,1,float16,fp8,0,0.11188266674677531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,128,0,1,fp8,fp8,0,0.1056160032749176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,128,0,1,float16,float16,0,0.11382933457692464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,128,0,1,float16,fp8,0,0.11442133784294128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,128,0,1,fp8,fp8,0,0.10801066954930623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,128,0,1,float16,float16,0,0.0783679982026418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,128,0,1,float16,fp8,0,0.07677866518497467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,128,0,1,fp8,fp8,0,0.07924266656239827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,128,0,1,float16,float16,0,0.06612266600131989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,128,0,1,float16,fp8,0,0.06657599906126659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,128,0,1,fp8,fp8,0,0.0620959997177124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,128,0,1,float16,float16,0,0.06665066878000896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,128,0,1,float16,fp8,0,0.06716266771157582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,128,0,1,fp8,fp8,0,0.06022400160630544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,128,0,1,float16,float16,0,0.06688533226648967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,128,0,1,float16,fp8,0,0.06748799979686737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,128,0,1,fp8,fp8,0,0.0621066689491272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,128,0,1,float16,float16,0,0.04596266647179922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,128,0,1,float16,fp8,0,0.04576000074545542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,128,0,1,fp8,fp8,0,0.04496533175309499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,128,0,1,float16,float16,0,0.04349866509437561
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,128,0,1,float16,fp8,0,0.04586666822433472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,128,0,1,fp8,fp8,0,0.04137066751718521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,128,0,1,float16,float16,0,0.04497066636880239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,128,0,1,float16,fp8,0,0.0459199994802475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,128,0,1,fp8,fp8,0,0.03949866692225138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,128,0,1,float16,float16,0,0.04376000165939331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,128,0,1,float16,fp8,0,0.043935999274253845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,128,0,1,fp8,fp8,0,0.039936001102129616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,128,0,1,float16,float16,0,2.6775840123494468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,128,0,1,float16,fp8,0,2.6885013580322266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,128,0,1,float16,float16,0,2.794528007507324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,128,0,1,fp8,fp8,0,2.488389333089193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,128,0,1,float16,fp8,0,2.783482551574707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,128,0,1,fp8,fp8,0,2.8987038930257163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,128,0,1,float16,float16,0,2.836186726888021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,128,0,1,fp8,fp8,0,2.84281063079834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,128,0,1,float16,fp8,0,2.8629067738850913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,128,0,1,float16,float16,0,1.6618506113688152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,128,0,1,float16,fp8,0,1.624575932820638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,128,0,1,fp8,fp8,0,1.6458932558695476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,128,0,1,float16,float16,0,1.3445439338684082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,128,0,1,float16,fp8,0,1.3490880330403645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,128,0,1,fp8,fp8,0,1.2465333143870037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,128,0,1,float16,float16,0,1.3622986475626628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,128,0,1,float16,fp8,0,1.3660586675008137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,128,0,1,fp8,fp8,0,1.4435200691223145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,128,0,1,float16,float16,0,1.3818559646606445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,128,0,1,float16,fp8,0,1.387935956319173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,128,0,1,fp8,fp8,0,1.3701599438985188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,128,0,1,float16,float16,0,0.8233226935068766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,128,0,1,float16,fp8,0,0.8130026658376058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,128,0,1,fp8,fp8,0,0.8283786773681641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,128,0,1,float16,float16,0,0.6850186983744303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,128,0,1,float16,fp8,0,0.6878080368041992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,128,0,1,fp8,fp8,0,0.6323253313700358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,128,0,1,float16,float16,0,0.6924213568369547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,128,0,1,float16,fp8,0,0.6921652952829996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,128,0,1,fp8,fp8,0,0.6447199980417887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,128,0,1,float16,float16,0,0.6992959976196289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,128,0,1,float16,fp8,0,0.7078133424123129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,128,0,1,fp8,fp8,0,0.6636373202006022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,128,0,1,float16,float16,0,0.42320001125335693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,128,0,1,float16,fp8,0,0.4152853488922119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,128,0,1,fp8,fp8,0,0.4230026801427205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,128,0,1,float16,float16,0,0.3563679854075114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,128,0,1,float16,fp8,0,0.35602132479349774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,128,0,1,fp8,fp8,0,0.3259626626968384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,128,0,1,float16,float16,0,0.3572373390197754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,128,0,1,float16,fp8,0,0.3580640157063802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,128,0,1,fp8,fp8,0,0.3295999964078267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,128,0,1,float16,float16,0,0.36241598924001056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,128,0,1,float16,fp8,0,0.36245866616566974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,128,0,1,fp8,fp8,0,0.34093864758809406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,128,0,1,float16,float16,0,0.22828267018000284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,128,0,1,float16,fp8,0,0.2225173314412435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,128,0,1,fp8,fp8,0,0.22258132696151733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,128,0,1,float16,float16,0,0.1893120010693868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,128,0,1,float16,fp8,0,0.19089066982269287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,128,0,1,fp8,fp8,0,0.17376534144083658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,128,0,1,float16,fp8,0,0.19327465693155924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,128,0,1,float16,float16,0,0.1919040083885193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,128,0,1,fp8,fp8,0,0.17571733395258585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,128,0,1,float16,float16,0,0.19289066394170126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,128,0,1,float16,fp8,0,0.19285867611567178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,128,0,1,fp8,fp8,0,0.18216000000635782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,128,0,1,float16,float16,0,0.12547733386357626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,128,0,1,float16,fp8,0,0.12315199772516887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,128,0,1,fp8,fp8,0,0.12388267119725545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,128,0,1,float16,float16,0,0.10461333394050598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,128,0,1,float16,fp8,0,0.10471999645233154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,128,0,1,float16,float16,0,0.1053013304869334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,128,0,1,fp8,fp8,0,0.09530133008956909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,128,0,1,float16,fp8,0,0.1058026651541392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,128,0,1,fp8,fp8,0,0.09687466422716777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,128,0,1,float16,float16,0,0.10485333204269409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,128,0,1,float16,fp8,0,0.1079306701819102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,128,0,1,fp8,fp8,0,0.09929600358009338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,128,0,1,float16,float16,0,0.07286933561166127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,128,0,1,float16,fp8,0,0.0687253326177597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,128,0,1,fp8,fp8,0,0.0735093355178833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,128,0,1,float16,float16,0,0.05975999931494395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,128,0,1,float16,fp8,0,0.05975999931494395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,128,0,1,fp8,fp8,0,0.05458133419354757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,128,0,1,float16,fp8,0,0.06042666733264923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,128,0,1,float16,float16,0,0.060906668504079185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,128,0,1,fp8,fp8,0,0.054048001766204834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,128,0,1,float16,float16,0,0.060517330964406334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,128,0,1,float16,fp8,0,0.06016000111897787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,128,0,1,fp8,fp8,0,0.05605333546797434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,128,0,1,float16,float16,0,0.04359999795754751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,128,0,1,float16,fp8,0,0.04382933179537455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,128,0,1,fp8,fp8,0,0.04139200101296107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,128,0,1,float16,float16,0,0.039861333866914116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,128,0,1,float16,fp8,0,0.0415786678592364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,128,0,1,fp8,fp8,0,0.037045332292715706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,128,0,1,float16,float16,0,0.041434665520985924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,128,0,1,float16,fp8,0,0.04040000090996424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,128,0,1,fp8,fp8,0,0.0374293327331543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,128,0,1,float16,float16,0,0.0399893323580424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,128,0,1,float16,fp8,0,0.04072533299525579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,128,0,1,fp8,fp8,0,0.03775466730197271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,128,0,1,float16,float16,0,0.027461332579453785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,128,0,1,float16,fp8,0,0.029482667644818623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,128,0,1,fp8,fp8,0,0.027679999669392902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,128,0,1,float16,float16,0,0.027269333600997925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,128,0,1,float16,fp8,0,0.027349332968393963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,128,0,1,fp8,fp8,0,0.025519999365011852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,128,0,1,float16,float16,0,0.027471999327341717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,128,0,1,float16,fp8,0,0.027424000203609467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,128,0,1,fp8,fp8,0,0.026911998788515728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,128,0,1,float16,float16,0,0.02739199995994568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,128,0,1,float16,fp8,0,0.02846933404604594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,128,0,1,fp8,fp8,0,0.02552533398071925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,128,0,1,float16,float16,0,1.461173375447591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,128,0,1,float16,fp8,0,1.4549387296040852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,128,0,1,fp8,fp8,0,1.3616159756978352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,128,0,1,float16,float16,0,1.4756959279378254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,128,0,1,float16,fp8,0,1.4732960065205891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,128,0,1,fp8,fp8,0,1.5491199493408203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,128,0,1,float16,float16,0,1.4935839970906575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,128,0,1,float16,fp8,0,1.5087839762369792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,128,0,1,float16,float16,0,0.8760800361633301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,128,0,1,fp8,fp8,0,1.4162720044453938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,128,0,1,float16,fp8,0,0.8594666322072347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,128,0,1,fp8,fp8,0,0.8783626556396484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,128,0,1,float16,float16,0,0.7372533480326334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,128,0,1,float16,fp8,0,0.7367520332336426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,128,0,1,fp8,fp8,0,0.6923200289408366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,128,0,1,float16,float16,0,0.7458879947662354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,128,0,1,float16,fp8,0,0.7477227052052816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,128,0,1,fp8,fp8,0,0.7075093587239584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,128,0,1,float16,float16,0,0.7529173692067465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,128,0,1,float16,fp8,0,0.7560266653696696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,128,0,1,fp8,fp8,0,0.716480016708374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,128,0,1,float16,float16,0,0.44547732671101886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,128,0,1,float16,fp8,0,0.4389813343683879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,128,0,1,fp8,fp8,0,0.4476693471272786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,128,0,1,float16,float16,0,0.37812801202138263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,128,0,1,float16,fp8,0,0.3806133270263672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,128,0,1,fp8,fp8,0,0.35334400335947674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,128,0,1,float16,float16,0,0.38279998302459717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,128,0,1,float16,fp8,0,0.38283733526865643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,128,0,1,fp8,fp8,0,0.35596267382303876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,128,0,1,float16,float16,0,0.38543466726938885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,128,0,1,float16,fp8,0,0.38812267780303955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,128,0,1,fp8,fp8,0,0.3665013313293457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,128,0,1,float16,float16,0,0.2366559902826945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,128,0,1,float16,fp8,0,0.23107733329137167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,128,0,1,fp8,fp8,0,0.23510400454203287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,128,0,1,float16,float16,0,0.19990400473276773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,128,0,1,float16,fp8,0,0.19947733481725058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,128,0,1,fp8,fp8,0,0.18557333946228027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,128,0,1,float16,float16,0,0.2002293268839518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,128,0,1,float16,fp8,0,0.1999733249346415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,128,0,1,fp8,fp8,0,0.18711467583974203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,128,0,1,float16,float16,0,0.2026080091794332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,128,0,1,float16,fp8,0,0.2036799987157186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,128,0,1,fp8,fp8,0,0.19246933857599893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,128,0,1,float16,float16,0,0.1272586683432261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,128,0,1,float16,fp8,0,0.12425600488980611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,128,0,1,float16,float16,0,0.10781332850456238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,128,0,1,fp8,fp8,0,0.13033599654833475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,128,0,1,float16,fp8,0,0.10764799515406291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,128,0,1,fp8,fp8,0,0.10128532846768697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,128,0,1,float16,float16,0,0.10873066385587056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,128,0,1,float16,fp8,0,0.10880000392595927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,128,0,1,fp8,fp8,0,0.1013759970664978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,128,0,1,float16,float16,0,0.10930666327476501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,128,0,1,float16,fp8,0,0.10946666200955708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,128,0,1,float16,float16,0,0.07262399792671204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,128,0,1,fp8,fp8,0,0.10602666934331258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,128,0,1,float16,fp8,0,0.0709440012772878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,128,0,1,fp8,fp8,0,0.07708266874154408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,128,0,1,float16,float16,0,0.0621973325808843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,128,0,1,float16,fp8,0,0.06274133423964183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,128,0,1,fp8,fp8,0,0.05791999896367391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,128,0,1,float16,float16,0,0.06268266836802165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,128,0,1,float16,fp8,0,0.06307200094064076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,128,0,1,fp8,fp8,0,0.057578667998313904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,128,0,1,float16,float16,0,0.06394133468468984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,128,0,1,float16,fp8,0,0.06317333380381267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,128,0,1,fp8,fp8,0,0.05811200042565664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,128,0,1,float16,fp8,0,0.040549332896868386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,128,0,1,float16,float16,0,0.041989331444104515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,128,0,1,fp8,fp8,0,0.039621333281199135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,128,0,1,float16,float16,0,0.03995199998219808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,128,0,1,float16,fp8,0,0.04082666585842768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,128,0,1,fp8,fp8,0,0.03750933210055033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,128,0,1,float16,float16,0,0.039621333281199135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,128,0,1,float16,fp8,0,0.03976533313592275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,128,0,1,fp8,fp8,0,0.03572800010442734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,128,0,1,float16,float16,0,0.03809600075085958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,128,0,1,float16,fp8,0,0.03756800045569738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,128,0,1,fp8,fp8,0,0.03601066768169403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,128,0,1,float16,float16,0,0.02903466671705246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,128,0,1,float16,fp8,0,0.02938666691382726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,128,0,1,fp8,fp8,0,0.02734400083621343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,128,0,1,float16,float16,0,0.027248000105222065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,128,0,1,float16,fp8,0,0.028725333511829376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,128,0,1,fp8,fp8,0,0.027552001178264618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,128,0,1,float16,float16,0,0.027530667682488758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,128,0,1,float16,fp8,0,0.028746667007605236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,128,0,1,fp8,fp8,0,0.027317332724730175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,128,0,1,float16,float16,0,0.027242665489514668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,128,0,1,float16,fp8,0,0.029365333418051403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,128,0,1,fp8,fp8,0,0.025519999365011852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,128,0,1,float16,float16,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,128,0,1,float16,fp8,0,0.0222080002228419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,128,0,1,fp8,fp8,0,0.021061333517233532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,128,0,1,float16,float16,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,128,0,1,float16,fp8,0,0.019434666881958645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,128,0,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,128,0,1,float16,float16,0,0.019461333751678467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,128,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,128,0,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,128,0,1,float16,float16,0,0.021125334004561108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,128,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,128,0,1,fp8,fp8,0,0.0195573332409064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,128,0,1,float16,float16,0,0.9255786736806234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,128,0,1,float16,fp8,0,0.9250720342000326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,128,0,1,fp8,fp8,0,0.8854560057322184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,128,0,1,float16,float16,0,0.9316266377766927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,128,0,1,float16,fp8,0,0.9395840167999268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,128,0,1,fp8,fp8,0,0.9141919612884521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,128,0,1,float16,float16,0,0.9440533320109049
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,128,0,1,float16,fp8,0,0.9421226978302002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,128,0,1,fp8,fp8,0,0.9073812961578369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,128,0,1,float16,float16,0,0.542143980662028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,128,0,1,float16,fp8,0,0.5329279899597168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,128,0,1,fp8,fp8,0,0.5437013308207194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,128,0,1,float16,float16,0,0.471834659576416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,128,0,1,float16,fp8,0,0.4708746671676636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,128,0,1,fp8,fp8,0,0.44812798500061035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,128,0,1,float16,float16,0,0.47647468249003094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,128,0,1,float16,fp8,0,0.47567466894785565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,128,0,1,fp8,fp8,0,0.4560106595357259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,128,0,1,float16,float16,0,0.4814879894256592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,128,0,1,float16,fp8,0,0.4811360041300456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,128,0,1,fp8,fp8,0,0.46240532398223877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,128,0,1,float16,float16,0,0.2829866607983907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,128,0,1,float16,fp8,0,0.278330663839976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,128,0,1,fp8,fp8,0,0.2817280093828837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,128,0,1,float16,float16,0,0.2456373373667399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,128,0,1,float16,fp8,0,0.2460106611251831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,128,0,1,fp8,fp8,0,0.2342080076535543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,128,0,1,float16,float16,0,0.2490560015042623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,128,0,1,float16,fp8,0,0.24925865729649863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,128,0,1,fp8,fp8,0,0.23603200912475586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,128,0,1,float16,float16,0,0.24958399931589761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,128,0,1,float16,fp8,0,0.24969067176183066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,128,0,1,fp8,fp8,0,0.23982399702072144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,128,0,1,float16,float16,0,0.15065600474675497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,128,0,1,float16,fp8,0,0.14857066671053568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,128,0,1,fp8,fp8,0,0.1525706648826599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,128,0,1,float16,float16,0,0.13058666388193765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,128,0,1,float16,fp8,0,0.131632000207901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,128,0,1,fp8,fp8,0,0.12395733594894409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,128,0,1,float16,float16,0,0.132314662138621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,128,0,1,float16,fp8,0,0.133242666721344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,128,0,1,fp8,fp8,0,0.12618133425712585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,128,0,1,float16,float16,0,0.13269333044687906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,128,0,1,float16,fp8,0,0.1329759955406189
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,128,0,1,fp8,fp8,0,0.12786133090655008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,128,0,1,float16,fp8,0,0.08326933284600575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,128,0,1,float16,float16,0,0.08594666918118794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,128,0,1,fp8,fp8,0,0.08885332942008972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,128,0,1,float16,float16,0,0.07502399881680806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,128,0,1,float16,fp8,0,0.07501866420110066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,128,0,1,fp8,fp8,0,0.0693280001481374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,128,0,1,float16,float16,0,0.07452799876530965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,128,0,1,fp8,fp8,0,0.07043200234572093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,128,0,1,float16,fp8,0,0.07468266785144806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,128,0,1,float16,float16,0,0.07482133309046428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,128,0,1,float16,fp8,0,0.07486400008201599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,128,0,1,fp8,fp8,0,0.06914666791756947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,128,0,1,float16,float16,0,0.048197334011395775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,128,0,1,float16,fp8,0,0.04816000163555145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,128,0,1,fp8,fp8,0,0.04660800099372864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,128,0,1,float16,float16,0,0.04571199913819631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,128,0,1,float16,fp8,0,0.04506133496761322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,128,0,1,fp8,fp8,0,0.042447999119758606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,128,0,1,float16,float16,0,0.045791998505592346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,128,0,1,float16,fp8,0,0.046069333950678505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,128,0,1,fp8,fp8,0,0.042122667034467064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,128,0,1,float16,float16,0,0.04572266836961111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,128,0,1,float16,fp8,0,0.045696000258127846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,128,0,1,fp8,fp8,0,0.04266133407751719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,128,0,1,float16,float16,0,0.03143999973932902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,128,0,1,float16,fp8,0,0.03124266614516576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,128,0,1,fp8,fp8,0,0.031397332747777305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,128,0,1,float16,float16,0,0.02941333254178365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,128,0,1,float16,fp8,0,0.029232000311215717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,128,0,1,fp8,fp8,0,0.029253333806991577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,128,0,1,float16,float16,0,0.029232000311215717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,128,0,1,float16,fp8,0,0.0295413335164388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,128,0,1,fp8,fp8,0,0.02740799884001414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,128,0,1,float16,float16,0,0.029477333029111225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,128,0,1,float16,fp8,0,0.02918400118748347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,128,0,1,fp8,fp8,0,0.02731200059254964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,128,0,1,float16,float16,0,0.023168000082174938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,128,0,1,float16,fp8,0,0.023056000471115112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,128,0,1,fp8,fp8,0,0.023226665953795116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,128,0,1,float16,float16,0,0.02317333221435547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,128,0,1,float16,fp8,0,0.023178666830062866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,128,0,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,128,0,1,float16,float16,0,0.023077333966890972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,128,0,1,float16,fp8,0,0.02311466634273529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,128,0,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,128,0,1,float16,float16,0,0.02319466571013133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,128,0,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,128,0,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,128,0,1,float16,float16,0,0.018986667195955913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,128,0,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,128,0,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,128,0,1,float16,float16,0,0.019023999571800232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,128,0,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,128,0,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,128,0,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,128,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,128,0,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,128,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,128,0,1,float16,float16,0,0.6722826957702637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,128,0,1,float16,fp8,0,0.6708373228708903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,128,0,1,fp8,fp8,0,0.6468160152435303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,128,0,1,float16,float16,0,0.6736106872558594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,128,0,1,float16,fp8,0,0.6761066913604736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,128,0,1,fp8,fp8,0,0.6533226569493612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,128,0,1,float16,float16,0,0.6777706940968832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,128,0,1,float16,fp8,0,0.6793226401011149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,128,0,1,float16,float16,0,0.38307201862335205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,128,0,1,fp8,fp8,0,0.6577706734339396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,128,0,1,float16,fp8,0,0.37768534819285077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,128,0,1,fp8,fp8,0,0.37881600856781006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,128,0,1,float16,float16,0,0.34781332810719806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,128,0,1,float16,fp8,0,0.34584001700083417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,128,0,1,fp8,fp8,0,0.33076266447703045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,128,0,1,float16,float16,0,0.34909331798553467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,128,0,1,float16,fp8,0,0.350325345993042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,128,0,1,fp8,fp8,0,0.33376534779866535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,128,0,1,float16,float16,0,0.3501013517379761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,128,0,1,float16,fp8,0,0.34971733887990314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,128,0,1,fp8,fp8,0,0.3373226722081502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,128,0,1,float16,float16,0,0.1999573310216268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,128,0,1,float16,fp8,0,0.19844265778859457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,128,0,1,fp8,fp8,0,0.19951466719309488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,128,0,1,float16,float16,0,0.18287465969721475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,128,0,1,float16,fp8,0,0.1839946707089742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,128,0,1,fp8,fp8,0,0.1731839974721273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,128,0,1,float16,float16,0,0.18288000424702963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,128,0,1,float16,fp8,0,0.18277867635091147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,128,0,1,fp8,fp8,0,0.17335466543833414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,128,0,1,float16,float16,0,0.18380266427993774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,128,0,1,float16,fp8,0,0.18465065956115723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,128,0,1,fp8,fp8,0,0.1770026683807373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,128,0,1,float16,float16,0,0.10778133074442546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,128,0,1,float16,fp8,0,0.10749866565068562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,128,0,1,fp8,fp8,0,0.11163199941317241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,128,0,1,float16,float16,0,0.09935466448465984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,128,0,1,float16,fp8,0,0.0995146632194519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,128,0,1,fp8,fp8,0,0.09311466415723164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,128,0,1,float16,float16,0,0.09874133268992107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,128,0,1,float16,fp8,0,0.09902399778366089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,128,0,1,fp8,fp8,0,0.09247466921806335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,128,0,1,float16,fp8,0,0.09923199812571208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,128,0,1,float16,float16,0,0.09974933664004008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,128,0,1,fp8,fp8,0,0.0934826632340749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,128,0,1,float16,float16,0,0.059152002135912575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,128,0,1,float16,fp8,0,0.05936533212661743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,128,0,1,fp8,fp8,0,0.05835199852784475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,128,0,1,float16,float16,0,0.05657599866390228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,128,0,1,fp8,fp8,0,0.05434666574001312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,128,0,1,float16,fp8,0,0.05876799921194712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,128,0,1,float16,float16,0,0.05653333167235056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,128,0,1,float16,fp8,0,0.05850133299827576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,128,0,1,fp8,fp8,0,0.056261335810025535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,128,0,1,float16,float16,0,0.05856533348560333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,128,0,1,float16,fp8,0,0.057850668827692665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,128,0,1,fp8,fp8,0,0.05529066423575083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,128,0,1,float16,float16,0,0.03745600084463755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,128,0,1,float16,fp8,0,0.037647999823093414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,128,0,1,fp8,fp8,0,0.036917333801587425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,128,0,1,float16,fp8,0,0.03751466671625773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,128,0,1,float16,float16,0,0.03584533433119456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,128,0,1,fp8,fp8,0,0.03537066777547201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,128,0,1,float16,float16,0,0.0360000009338061
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,128,0,1,float16,fp8,0,0.03722133239110311
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,128,0,1,fp8,fp8,0,0.03521066655715307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,128,0,1,float16,float16,0,0.036288000643253326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,128,0,1,float16,fp8,0,0.03764266769091288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,128,0,1,fp8,fp8,0,0.03532800078392029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,128,0,1,float16,float16,0,0.02552533398071925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,128,0,1,float16,fp8,0,0.02515200028816859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,128,0,1,fp8,fp8,0,0.02552533398071925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,128,0,1,float16,float16,0,0.025285333395004272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,128,0,1,float16,fp8,0,0.025173333783944447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,128,0,1,fp8,fp8,0,0.02348266790310542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,128,0,1,float16,float16,0,0.025546667476495106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,128,0,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,128,0,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,128,0,1,float16,float16,0,0.025173333783944447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,128,0,1,float16,fp8,0,0.025519999365011852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,128,0,1,fp8,fp8,0,0.02514133354028066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,128,0,1,float16,float16,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,128,0,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,128,0,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,128,0,1,float16,float16,0,0.01939733326435089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,128,0,1,float16,fp8,0,0.02059200033545494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,128,0,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,128,0,1,float16,float16,0,0.019306667149066925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,128,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,128,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,128,0,1,float16,float16,0,0.021514666577180225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,128,0,1,float16,fp8,0,0.02120000123977661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,128,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,128,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,128,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,128,0,1,float16,float16,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,128,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,128,0,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,128,0,1,float16,float16,0,0.5523039897282919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,128,0,1,float16,fp8,0,0.5501546859741211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,128,0,1,fp8,fp8,0,0.5342400074005127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,128,0,1,float16,float16,0,0.5483786662419637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,128,0,1,fp8,fp8,0,0.5363306601842245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,128,0,1,float16,fp8,0,0.5498666763305664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,128,0,1,float16,float16,0,0.55130668481191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,128,0,1,float16,fp8,0,0.550927996635437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,128,0,1,float16,float16,0,0.30108799537022907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,128,0,1,fp8,fp8,0,0.5386026700337728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,128,0,1,float16,fp8,0,0.30057599147160846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,128,0,1,fp8,fp8,0,0.3015786608060201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,128,0,1,float16,float16,0,0.28437334299087524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,128,0,1,float16,fp8,0,0.28511999050776166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,128,0,1,fp8,fp8,0,0.27374933163324994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,128,0,1,float16,float16,0,0.28510934114456177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,128,0,1,float16,fp8,0,0.28617600599924725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,128,0,1,fp8,fp8,0,0.2767519950866699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,128,0,1,float16,float16,0,0.2868906656901042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,128,0,1,fp8,fp8,0,0.27905599276224774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,128,0,1,float16,float16,0,0.15985600153605142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,128,0,1,float16,fp8,0,0.1588586668173472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,128,0,1,fp8,fp8,0,0.16201066970825195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,128,0,1,float16,float16,0,0.15083199739456177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,128,0,1,float16,fp8,0,0.15160000324249268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,128,0,1,fp8,fp8,0,0.14421332875887552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,128,0,1,float16,float16,0,0.15178133050600687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,128,0,1,float16,fp8,0,0.1509813368320465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,128,0,1,fp8,fp8,0,0.14334932963053384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,128,0,1,float16,float16,0,0.15132799744606018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,128,0,1,float16,fp8,0,0.15074132879575095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,128,0,1,float16,fp8,0,0.2854880094528198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,128,0,1,fp8,fp8,0,0.1444960037867228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,128,0,1,float16,fp8,0,0.084906667470932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,128,0,1,fp8,fp8,0,0.08301866551240285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,128,0,1,float16,float16,0,0.08284266789754231
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,128,0,1,float16,fp8,0,0.08321066697438557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,128,0,1,fp8,fp8,0,0.07867200175921123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,128,0,1,float16,float16,0,0.08544533451398213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,128,0,1,float16,float16,0,0.08360000451405843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,128,0,1,float16,fp8,0,0.08290666838486989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,128,0,1,float16,float16,0,0.08345599969228108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,128,0,1,float16,fp8,0,0.08281599978605907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,128,0,1,fp8,fp8,0,0.08083199958006541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,128,0,1,float16,float16,0,0.05017599960168203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,128,0,1,fp8,fp8,0,0.07976533472537994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,128,0,1,float16,fp8,0,0.049829334020614624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,128,0,1,fp8,fp8,0,0.05016533533732096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,128,0,1,float16,float16,0,0.04979733129342397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,128,0,1,float16,fp8,0,0.04816000163555145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,128,0,1,fp8,fp8,0,0.04773866633574168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,128,0,1,float16,float16,0,0.04966400067011515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,128,0,1,float16,fp8,0,0.0481333335240682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,128,0,1,fp8,fp8,0,0.04808000226815542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,128,0,1,float16,float16,0,0.0497920016447703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,128,0,1,float16,fp8,0,0.0480373352766037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,128,0,1,fp8,fp8,0,0.047466665506362915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,128,0,1,float16,float16,0,0.03180799881617228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,128,0,1,float16,fp8,0,0.03333866596221924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,128,0,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,128,0,1,float16,float16,0,0.0314026673634847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,128,0,1,float16,fp8,0,0.033557333052158356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,128,0,1,fp8,fp8,0,0.03141333411137263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,128,0,1,float16,float16,0,0.03325333446264267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,128,0,1,float16,fp8,0,0.03340800106525421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,128,0,1,fp8,fp8,0,0.031173333525657654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,128,0,1,float16,float16,0,0.03316800047953924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,128,0,1,float16,fp8,0,0.031658666829268135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,128,0,1,fp8,fp8,0,0.03140799949566523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,128,0,1,float16,float16,0,0.02329600105683009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,128,0,1,float16,fp8,0,0.023290666441122692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,128,0,1,fp8,fp8,0,0.0233599990606308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,128,0,1,float16,float16,0,0.0222080002228419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,128,0,1,float16,fp8,0,0.023247999449570973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,128,0,1,fp8,fp8,0,0.02143466720978419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,128,0,1,float16,float16,0,0.022976001103719074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,128,0,1,float16,fp8,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,128,0,1,fp8,fp8,0,0.023386667172114056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,128,0,1,float16,float16,0,0.021536000072956085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,128,0,1,float16,fp8,0,0.023242667317390442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,128,0,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,128,0,1,float16,float16,0,0.019589333484570186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,128,0,1,float16,fp8,0,0.021045332153638203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,128,0,1,fp8,fp8,0,0.02109333376089732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,128,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,128,0,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,128,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,128,0,1,float16,float16,0,0.020560000091791153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,128,0,1,float16,fp8,0,0.019386666516462963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,128,0,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,128,0,1,float16,float16,0,0.02083733429511388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,128,0,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,128,0,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,128,0,1,float16,float16,0,0.01602666700879733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,128,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,128,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,128,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,128,0,1,float16,float16,0,0.01674666628241539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,128,0,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,128,0,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,0,0.4610186815261841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,0,0.46059731642405194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,1,128,0,1,fp8,fp8,0,0.435205340385437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,0,0.4601813157399495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,0,0.45924798647562665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,4,128,0,1,fp8,fp8,0,0.4353119929631551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,0,0.46005864938100177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,8,128,0,1,fp8,fp8,0,0.43539198239644367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,0,0.4601173400878906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,96,128,0,1,float16,float16,0,0.23769599199295044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,96,128,0,1,float16,fp8,0,0.23775466283162436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,96,128,0,1,fp8,fp8,0,0.22643200556437174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,0,0.23657600084940592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,0,0.23691733678181967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,1,128,0,1,fp8,fp8,0,0.22461867332458496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,0,0.2369813323020935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,0,0.23689067363739014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,4,128,0,1,fp8,fp8,0,0.2265066703160604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,0,0.23570666710535684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,0,0.23707733551661173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,96,8,128,0,1,fp8,fp8,0,0.22447466850280762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,96,128,0,1,float16,float16,0,0.12587733070055643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,96,128,0,1,float16,fp8,0,0.12611200412114462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,96,128,0,1,fp8,fp8,0,0.11979200442632039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,0,0.12571199735005698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,0,0.12533332904179892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,1,128,0,1,fp8,fp8,0,0.11979732910792033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,0,0.12589866916338602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,0,0.12573867042859396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,4,128,0,1,fp8,fp8,0,0.12010666728019714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,0,0.12383466958999634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,0,0.12570133805274963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,96,8,128,0,1,fp8,fp8,0,0.11965866883595784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,96,128,0,1,float16,float16,0,0.07039466500282288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,96,128,0,1,float16,fp8,0,0.07053333520889282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,96,128,0,1,fp8,fp8,0,0.06861333549022675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,0,0.07054399947325389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,0,0.07032533486684163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,1,128,0,1,fp8,fp8,0,0.06844800213972728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,0,0.07168533404668172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,0,0.0710506687561671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,4,128,0,1,fp8,fp8,0,0.06841066479682922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,0,0.07361066838105519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,0,0.07190399865309398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,96,8,128,0,1,fp8,fp8,0,0.06818133095900218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,96,128,0,1,float16,float16,0,0.043477331598599754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,96,128,0,1,float16,fp8,0,0.04378133515516917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,96,128,0,1,fp8,fp8,0,0.04185600082079569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,0,0.04358933369318644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,0,0.04362666606903076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,1,128,0,1,fp8,fp8,0,0.04358399907747904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,0,0.04526400069395701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,0,0.04597333570321401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,4,128,0,1,fp8,fp8,0,0.043040002385775246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,0,0.043525333205858864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,0,0.04404800136884054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,96,8,128,0,1,fp8,fp8,0,0.04171200096607208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,96,128,0,1,float16,float16,0,0.029535998900731403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,96,128,0,1,fp8,fp8,0,0.029279999434947968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,0,0.029301332930723827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,96,128,0,1,float16,fp8,0,0.030495998760064442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,0,0.029498666524887085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,1,128,0,1,fp8,fp8,0,0.02942933390537898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,0,0.03181333343187968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,0,0.03235200047492981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,4,128,0,1,fp8,fp8,0,0.029205332199732464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,0,0.03151999910672506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,0,0.030405332644780476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,96,8,128,0,1,fp8,fp8,0,0.029215998947620392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,96,128,0,1,float16,float16,0,0.023024000227451324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,96,128,0,1,fp8,fp8,0,0.023290666441122692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,96,128,0,1,float16,fp8,0,0.021722666919231415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,0,0.02310933421055476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,1,128,0,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,0,0.023247999449570973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,0,0.023557332654794056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,4,128,0,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,0,0.021231998999913532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,0,0.022426667312781017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,96,8,128,0,1,fp8,fp8,0,0.021359999974568684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,96,128,0,1,float16,float16,0,0.020869334538777668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,96,128,0,1,float16,fp8,0,0.020954666038354237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,96,128,0,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,0,0.021226666867733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,1,128,0,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,0,0.01922133316596349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,4,128,0,1,fp8,fp8,0,0.019466667125622433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,0,0.02120000123977661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,96,8,128,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,96,128,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,96,128,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,96,128,0,1,fp8,fp8,0,0.01589866727590561
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,0,0.016762666404247284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,1,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,4,128,0,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,96,8,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,128,0,1,fp8,fp8,0,26.708399454752605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,128,0,1,float16,fp8,0,39.323707580566406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,128,0,1,float16,float16,0,43.59514872233073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,128,0,1,float16,float16,0,43.37508646647135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,128,0,1,fp8,fp8,0,29.470726013183594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,128,0,1,float16,fp8,0,40.49341837565104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,128,0,1,fp8,fp8,0,25.627034505208332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,128,0,1,float16,float16,0,43.12132263183594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,128,0,1,float16,fp8,0,44.020589192708336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,128,0,1,float16,float16,0,40.97484334309896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,128,0,1,float16,float16,0,20.83028284708659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,128,0,1,fp8,fp8,0,29.015914916992188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,128,0,1,float16,fp8,0,43.56694030761719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,128,0,1,fp8,fp8,0,14.175572713216146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,128,0,1,float16,fp8,0,20.2445068359375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,128,0,1,float16,float16,0,21.5961176554362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,128,0,1,float16,fp8,0,20.160255432128906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,128,0,1,fp8,fp8,0,13.934666951497396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,128,0,1,float16,float16,0,20.10320536295573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,128,0,1,fp8,fp8,0,13.248011271158854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,128,0,1,float16,fp8,0,22.51441192626953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,128,0,1,float16,float16,0,20.971691131591797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,128,0,1,fp8,fp8,0,13.416095733642578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,128,0,1,float16,fp8,0,20.132549285888672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,128,0,1,fp8,fp8,0,13.283531188964844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,128,0,1,float16,float16,0,22.465535481770832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,128,0,1,float16,float16,0,10.844192504882812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,128,0,1,float16,fp8,0,22.357716878255207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,128,0,1,fp8,fp8,0,7.450570424397786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,128,0,1,float16,fp8,0,11.512341817220053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,128,0,1,float16,float16,0,10.344677607218424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,128,0,1,float16,fp8,0,11.46393076578776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,128,0,1,fp8,fp8,0,6.976655960083008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,128,0,1,float16,float16,0,10.89520009358724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,128,0,1,float16,fp8,0,10.22820790608724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,128,0,1,fp8,fp8,0,6.718751907348633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,128,0,1,float16,float16,0,11.620379130045572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,128,0,1,fp8,fp8,0,6.301888147989909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,128,0,1,float16,fp8,0,10.688341776529947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,128,0,1,float16,float16,0,11.295637766520182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,128,0,1,fp8,fp8,0,6.452512105305989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,128,0,1,float16,float16,0,6.288880030314128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,128,0,1,float16,fp8,0,11.58465576171875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,128,0,1,float16,fp8,0,6.280693054199219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,128,0,1,fp8,fp8,0,3.204789479573568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,128,0,1,float16,float16,0,5.611194610595703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,128,0,1,float16,fp8,0,5.418565114339192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,128,0,1,fp8,fp8,0,3.520751953125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,128,0,1,float16,float16,0,5.258639971415202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,128,0,1,fp8,fp8,0,3.4291038513183594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,128,0,1,float16,fp8,0,5.049269358317058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,128,0,1,float16,float16,0,5.759189605712891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,128,0,1,float16,fp8,0,5.3285172780354815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,128,0,1,fp8,fp8,0,3.5458720525105796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,128,0,1,float16,float16,0,5.5894349416097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,128,0,1,fp8,fp8,0,3.1012748082478843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,128,0,1,float16,fp8,0,5.857877095540364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,128,0,1,fp8,fp8,0,15.129488627115885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,128,0,1,float16,float16,0,26.620641072591145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,128,0,1,float16,fp8,0,25.21161651611328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,128,0,1,float16,float16,0,25.208783467610676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,128,0,1,float16,fp8,0,24.010655721028645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,128,0,1,fp8,fp8,0,16.394128163655598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,128,0,1,float16,float16,0,25.21502939860026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,128,0,1,float16,fp8,0,26.517072041829426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,128,0,1,fp8,fp8,0,15.020917256673178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,128,0,1,float16,float16,0,25.907697041829426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,128,0,1,float16,float16,0,13.39523188273112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,128,0,1,fp8,fp8,0,16.066277821858723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,128,0,1,float16,fp8,0,25.592320760091145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,128,0,1,fp8,fp8,0,8.103541056315104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,128,0,1,float16,fp8,0,13.46835708618164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,128,0,1,fp8,fp8,0,7.391071955362956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,128,0,1,float16,float16,0,13.389551798502604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,128,0,1,float16,fp8,0,12.974783579508463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,128,0,1,fp8,fp8,0,7.888064066569011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,128,0,1,float16,float16,0,12.894400278727213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,128,0,1,float16,fp8,0,12.479568481445312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,128,0,1,float16,float16,0,12.720240275065104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,128,0,1,fp8,fp8,0,8.12777582804362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,128,0,1,float16,fp8,0,13.323535919189453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,128,0,1,float16,float16,0,11.985733032226562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,128,0,1,fp8,fp8,0,8.12009048461914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,128,0,1,float16,float16,0,7.459333419799805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,128,0,1,float16,fp8,0,12.433311462402344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,128,0,1,float16,fp8,0,6.141050974527995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,128,0,1,fp8,fp8,0,4.470234553019206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,128,0,1,fp8,fp8,0,3.5167147318522134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,128,0,1,float16,float16,0,6.623546600341797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,128,0,1,float16,fp8,0,6.352197647094727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,128,0,1,float16,float16,0,6.669189453125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,128,0,1,fp8,fp8,0,3.6805760065714517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,128,0,1,float16,fp8,0,6.140634536743164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,128,0,1,float16,float16,0,6.62937609354655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,128,0,1,fp8,fp8,0,4.163098653157552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,128,0,1,float16,fp8,0,6.658698399861653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,128,0,1,float16,float16,0,6.364213307698567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,128,0,1,float16,float16,0,3.526277224222819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,128,0,1,fp8,fp8,0,3.990432103474935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,128,0,1,float16,fp8,0,6.575525283813477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,128,0,1,float16,fp8,0,3.003530820210775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,128,0,1,fp8,fp8,0,2.136293411254883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,128,0,1,float16,float16,0,3.130864143371582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,128,0,1,float16,fp8,0,3.0320053100585938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,128,0,1,fp8,fp8,0,1.9200107256571453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,128,0,1,float16,float16,0,3.1322507858276367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,128,0,1,fp8,fp8,0,1.9149227142333984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,128,0,1,float16,fp8,0,3.105818748474121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,128,0,1,float16,float16,0,3.0520426432291665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,128,0,1,float16,fp8,0,2.181685288747152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,128,0,1,fp8,fp8,0,1.9244640668233235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,128,0,1,float16,float16,0,3.0141814549764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,128,0,1,fp8,fp8,0,1.920687993367513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,128,0,1,float16,fp8,0,2.856106758117676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,128,0,1,fp8,fp8,0,10.969786326090494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,128,0,1,float16,float16,0,17.089269002278645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,128,0,1,float16,float16,0,17.144757588704426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,128,0,1,float16,fp8,0,18.878011067708332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,128,0,1,fp8,fp8,0,11.58740234375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,128,0,1,float16,fp8,0,18.546656290690105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,128,0,1,float16,float16,0,16.458346048990887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,128,0,1,float16,fp8,0,18.282175699869793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,128,0,1,fp8,fp8,0,11.706148783365885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,128,0,1,float16,float16,0,18.22374979654948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,128,0,1,float16,float16,0,10.269807815551758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,128,0,1,fp8,fp8,0,11.84668223063151
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,128,0,1,float16,fp8,0,17.367380777994793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,128,0,1,fp8,fp8,0,6.463242848714192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,128,0,1,float16,fp8,0,9.764266967773438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,128,0,1,float16,float16,0,9.539424260457357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,128,0,1,float16,fp8,0,8.692858378092447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,128,0,1,fp8,fp8,0,5.203882535298665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,128,0,1,float16,float16,0,8.639888127644857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,128,0,1,fp8,fp8,0,5.129221280415853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,128,0,1,float16,fp8,0,9.536794662475586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,128,0,1,float16,float16,0,9.51418685913086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,128,0,1,fp8,fp8,0,4.962181409200032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,128,0,1,float16,fp8,0,8.846858978271484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,128,0,1,fp8,fp8,0,5.106586774190267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,128,0,1,float16,float16,0,9.543488184611002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,128,0,1,float16,float16,0,4.851418813069661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,128,0,1,float16,fp8,0,3.8782774607340493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,128,0,1,float16,fp8,0,10.156895955403646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,128,0,1,fp8,fp8,0,3.126720110575358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,128,0,1,float16,float16,0,4.581727981567383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,128,0,1,float16,fp8,0,3.796245257059733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,128,0,1,fp8,fp8,0,2.8240639368693032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,128,0,1,float16,float16,0,4.73853333791097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,128,0,1,fp8,fp8,0,2.6903359095255532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,128,0,1,float16,fp8,0,4.579445203145345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,128,0,1,float16,float16,0,4.745770772298177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,128,0,1,float16,fp8,0,4.8760426839192705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,128,0,1,fp8,fp8,0,2.642522652943929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,128,0,1,float16,float16,0,1.6787145932515461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,128,0,1,float16,fp8,0,4.613338788350423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,128,0,1,float16,float16,0,4.6669918696085615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,128,0,1,fp8,fp8,0,2.8098185857137046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,128,0,1,float16,fp8,0,2.218853314717611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,128,0,1,fp8,fp8,0,1.4828534126281738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,128,0,1,float16,fp8,0,1.9681493441263835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,128,0,1,float16,float16,0,1.7326399485270183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,128,0,1,fp8,fp8,0,1.42848539352417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,128,0,1,fp8,fp8,0,1.4249226252237956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,128,0,1,float16,float16,0,2.4965012868245444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,128,0,1,float16,fp8,0,2.2375200589497886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,128,0,1,float16,float16,0,1.6497440338134766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,128,0,1,float16,fp8,0,2.313968022664388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,128,0,1,fp8,fp8,0,1.4283307393391926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,128,0,1,float16,float16,0,2.105168024698893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,128,0,1,float16,fp8,0,1.6304853757222493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,128,0,1,fp8,fp8,0,1.431978702545166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,128,0,1,fp8,fp8,0,14.64077377319336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,128,0,1,float16,float16,0,25.00282033284505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,128,0,1,float16,fp8,0,25.011952718098957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,128,0,1,float16,float16,0,23.752601623535156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,128,0,1,float16,fp8,0,22.66704559326172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,128,0,1,fp8,fp8,0,15.622005462646484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,128,0,1,float16,float16,0,22.847124735514324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,128,0,1,float16,fp8,0,24.810452779134113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,128,0,1,fp8,fp8,0,14.03985595703125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,128,0,1,float16,float16,0,12.65924326578776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,128,0,1,float16,float16,0,25.064224243164062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,128,0,1,fp8,fp8,0,15.892667134602865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,128,0,1,float16,fp8,0,23.18164316813151
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,128,0,1,fp8,fp8,0,8.262837092081705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,128,0,1,float16,fp8,0,12.143044789632162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,128,0,1,float16,float16,0,12.498069763183594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,128,0,1,fp8,fp8,0,8.109605153401693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,128,0,1,float16,fp8,0,11.761163075764975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,128,0,1,float16,float16,0,11.229423522949219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,128,0,1,fp8,fp8,0,7.943210601806641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,128,0,1,float16,fp8,0,12.421744028727213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,128,0,1,float16,float16,0,12.029652913411459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,128,0,1,fp8,fp8,0,7.762997309366862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,128,0,1,float16,fp8,0,11.364191691080729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,128,0,1,float16,float16,0,12.515738169352213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,128,0,1,float16,float16,0,6.059520085652669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,128,0,1,fp8,fp8,0,7.92137082417806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,128,0,1,float16,fp8,0,11.810768127441406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,128,0,1,float16,fp8,0,6.582842508951823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,128,0,1,fp8,fp8,0,3.6631787618001304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,128,0,1,float16,float16,0,6.194976170857747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,128,0,1,fp8,fp8,0,3.693386713663737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,128,0,1,float16,float16,0,5.502480189005534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,128,0,1,float16,fp8,0,5.477280298868815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,128,0,1,float16,fp8,0,6.029594421386719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,128,0,1,fp8,fp8,0,3.295077323913574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,128,0,1,float16,float16,0,5.7241973876953125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,128,0,1,fp8,fp8,0,3.423957188924154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,128,0,1,float16,fp8,0,6.247376124064128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,128,0,1,float16,fp8,0,5.802197138468425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,128,0,1,fp8,fp8,0,3.4629014333089194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,128,0,1,float16,float16,0,5.9606882731119795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,128,0,1,float16,float16,0,3.1995038986206055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,128,0,1,fp8,fp8,0,1.8812692960103352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,128,0,1,float16,float16,0,2.5852373441060386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,128,0,1,float16,fp8,0,2.363050619761149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,128,0,1,float16,fp8,0,2.4187307357788086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,128,0,1,fp8,fp8,0,1.770917256673177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,128,0,1,float16,float16,0,2.164229393005371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,128,0,1,float16,fp8,0,2.406442642211914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,128,0,1,fp8,fp8,0,1.7671945889790852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,128,0,1,float16,float16,0,2.3332053820292153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,128,0,1,fp8,fp8,0,1.7752960522969563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,128,0,1,float16,fp8,0,2.7119572957356772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,128,0,1,float16,float16,0,2.8242934544881186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,128,0,1,float16,fp8,0,2.485194683074951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,128,0,1,fp8,fp8,0,1.7846934000651042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,128,0,1,float16,float16,0,1.4430774052937825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,128,0,1,float16,fp8,0,1.4758240381876628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,128,0,1,fp8,fp8,0,1.336725393931071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,128,0,1,float16,float16,0,1.1784640153249104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,128,0,1,float16,fp8,0,1.2533600330352783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,128,0,1,fp8,fp8,0,1.0061972935994465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,128,0,1,float16,float16,0,1.1300480365753174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,128,0,1,float16,fp8,0,1.3727572758992512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,128,0,1,fp8,fp8,0,1.1930560270945232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,128,0,1,float16,float16,0,1.1305332978566487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,128,0,1,float16,fp8,0,1.133855978647868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,128,0,1,fp8,fp8,0,1.0131999651590984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,128,0,1,float16,float16,0,1.1358293692270915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,128,0,1,fp8,fp8,0,1.013375997543335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,128,0,1,float16,fp8,0,1.2930346330006917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,128,0,1,fp8,fp8,0,8.230735778808594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,128,0,1,float16,fp8,0,13.46020762125651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,128,0,1,float16,float16,0,14.834607442220053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,128,0,1,float16,float16,0,14.55191421508789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,128,0,1,float16,fp8,0,14.109636942545572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,128,0,1,fp8,fp8,0,9.59059206644694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,128,0,1,float16,float16,0,14.661616007486979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,128,0,1,float16,fp8,0,14.573685963948568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,128,0,1,fp8,fp8,0,8.77081044514974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,128,0,1,float16,float16,0,14.12817637125651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,128,0,1,float16,float16,0,7.486602783203125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,128,0,1,fp8,fp8,0,9.434000015258789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,128,0,1,float16,fp8,0,14.434820810953775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,128,0,1,fp8,fp8,0,4.275733311971028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,128,0,1,float16,fp8,0,7.52569580078125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,128,0,1,float16,float16,0,6.932149251302083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,128,0,1,fp8,fp8,0,3.9736000696818032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,128,0,1,float16,fp8,0,7.159936269124349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,128,0,1,float16,float16,0,6.780357360839844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,128,0,1,fp8,fp8,0,4.06382942199707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,128,0,1,float16,fp8,0,7.344170888264974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,128,0,1,fp8,fp8,0,4.034111976623535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,128,0,1,float16,float16,0,7.11729621887207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,128,0,1,float16,fp8,0,7.078586578369141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,128,0,1,float16,float16,0,7.530373255411784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,128,0,1,float16,fp8,0,6.779408137003581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,128,0,1,fp8,fp8,0,4.418154716491699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,128,0,1,float16,float16,0,3.619669278462728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,128,0,1,float16,fp8,0,3.5828641255696616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,128,0,1,fp8,fp8,0,2.5560800234476724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,128,0,1,float16,float16,0,3.438943862915039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,128,0,1,float16,fp8,0,3.2977333068847656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,128,0,1,fp8,fp8,0,2.123194694519043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,128,0,1,float16,float16,0,2.548389275868734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,128,0,1,fp8,fp8,0,2.127232074737549
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,128,0,1,float16,fp8,0,3.3102986017862954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,128,0,1,float16,float16,0,3.125760078430176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,128,0,1,fp8,fp8,0,2.060704072316488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,128,0,1,float16,fp8,0,3.0072800318400064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,128,0,1,float16,float16,0,3.5997387568155923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,128,0,1,float16,float16,0,1.6801172892252605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,128,0,1,fp8,fp8,0,2.072314739227295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,128,0,1,float16,fp8,0,3.6469761530558267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,128,0,1,float16,float16,0,1.2692853609720867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,128,0,1,float16,fp8,0,1.9065279960632324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,128,0,1,fp8,fp8,0,1.2011946837107341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,128,0,1,float16,fp8,0,1.479413350423177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,128,0,1,fp8,fp8,0,1.1213280359903972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,128,0,1,float16,fp8,0,1.2689440250396729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,128,0,1,float16,float16,0,1.760053316752116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,128,0,1,float16,float16,0,1.2692853609720867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,128,0,1,fp8,fp8,0,1.3888479868570964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,128,0,1,float16,fp8,0,1.56332794825236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,128,0,1,fp8,fp8,0,1.1296213467915852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,128,0,1,float16,float16,0,1.301466703414917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,128,0,1,float16,fp8,0,1.271999994913737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,128,0,1,float16,float16,0,0.7581226825714111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,128,0,1,fp8,fp8,0,1.5436959266662598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,128,0,1,float16,fp8,0,0.9589333534240723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,128,0,1,float16,float16,0,0.7338986396789551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,128,0,1,fp8,fp8,0,0.6915573279062907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,128,0,1,float16,fp8,0,0.7347466945648193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,128,0,1,float16,float16,0,0.7288106282552084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,128,0,1,fp8,fp8,0,0.793445348739624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,128,0,1,float16,fp8,0,0.7325867017110189
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,128,0,1,fp8,fp8,0,0.6561866601308187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,128,0,1,float16,float16,0,0.7303253014882406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,128,0,1,float16,fp8,0,0.7341279983520508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,128,0,1,fp8,fp8,0,0.6576053301493326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,128,0,1,float16,float16,0,0.7292319933573405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,128,0,1,float16,fp8,0,0.7394506931304932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,128,0,1,fp8,fp8,0,0.6604426701863607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,128,0,1,fp8,fp8,0,8.713130950927734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,128,0,1,float16,float16,0,13.13967514038086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,128,0,1,float16,fp8,0,12.285621643066406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,128,0,1,float16,float16,0,13.086682637532553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,128,0,1,fp8,fp8,0,9.033749262491861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,128,0,1,float16,fp8,0,14.955098470052084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,128,0,1,float16,float16,0,13.487263997395834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,128,0,1,float16,fp8,0,15.566635131835938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,128,0,1,fp8,fp8,0,8.79361089070638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,128,0,1,float16,float16,0,14.186336517333984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,128,0,1,float16,float16,0,7.612890879313151
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,128,0,1,float16,fp8,0,13.465413411458334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,128,0,1,fp8,fp8,0,9.788309097290039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,128,0,1,fp8,fp8,0,4.961615880330403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,128,0,1,float16,float16,0,5.937503814697266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,128,0,1,float16,fp8,0,8.159562428792318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,128,0,1,float16,fp8,0,6.5822188059488935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,128,0,1,fp8,fp8,0,4.041562716166179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,128,0,1,float16,float16,0,6.609557469685872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,128,0,1,fp8,fp8,0,4.460490544637044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,128,0,1,float16,fp8,0,6.91261355082194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,128,0,1,float16,float16,0,6.614778518676758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,128,0,1,fp8,fp8,0,4.7170718510945635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,128,0,1,float16,fp8,0,6.667360305786133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,128,0,1,float16,float16,0,6.820522944132487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,128,0,1,float16,float16,0,3.258432070414225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,128,0,1,fp8,fp8,0,4.588512102762858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,128,0,1,float16,fp8,0,7.036618550618489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,128,0,1,float16,float16,0,2.892949422200521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,128,0,1,float16,fp8,0,4.003013292948405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,128,0,1,fp8,fp8,0,2.282543977101644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,128,0,1,float16,fp8,0,3.059141476949056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,128,0,1,fp8,fp8,0,2.149690628051758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,128,0,1,float16,fp8,0,2.605386734008789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,128,0,1,fp8,fp8,0,2.0592053731282554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,128,0,1,float16,float16,0,3.16867733001709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,128,0,1,fp8,fp8,0,2.065674622853597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,128,0,1,float16,float16,0,3.239503860473633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,128,0,1,float16,fp8,0,3.125376065572103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,128,0,1,float16,float16,0,3.3297707239786782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,128,0,1,float16,float16,0,1.47216002146403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,128,0,1,fp8,fp8,0,2.187189261118571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,128,0,1,float16,fp8,0,3.0810934702555337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,128,0,1,float16,fp8,0,1.798181374867757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,128,0,1,fp8,fp8,0,1.2015466690063477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,128,0,1,float16,float16,0,1.244048039118449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,128,0,1,float16,fp8,0,1.5223466555277507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,128,0,1,fp8,fp8,0,1.0930559635162354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,128,0,1,float16,float16,0,1.2781706651051838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,128,0,1,float16,fp8,0,1.4092106819152832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,128,0,1,fp8,fp8,0,1.0938080151875813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,128,0,1,float16,float16,0,1.2956960201263428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,128,0,1,float16,fp8,0,1.2906346321105957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,128,0,1,fp8,fp8,0,1.1060480276743572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,128,0,1,float16,float16,0,1.2416373093922932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,128,0,1,float16,float16,0,0.7356906731923422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,128,0,1,fp8,fp8,0,1.158565362294515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,128,0,1,float16,fp8,0,0.7701653639475504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,128,0,1,float16,fp8,0,1.4784587224324544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,128,0,1,fp8,fp8,0,0.6720320383707682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,128,0,1,float16,float16,0,0.686357339223226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,128,0,1,float16,fp8,0,0.702234665552775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,128,0,1,fp8,fp8,0,0.6153493324915568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,128,0,1,float16,float16,0,0.6892960071563721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,128,0,1,float16,fp8,0,0.6970132986704508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,128,0,1,fp8,fp8,0,0.678933302561442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,128,0,1,float16,float16,0,0.6843413511912028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,128,0,1,fp8,fp8,0,0.6190293232599894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,128,0,1,float16,fp8,0,0.7003893057505289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,128,0,1,float16,float16,0,0.6894240379333496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,128,0,1,float16,fp8,0,0.6896746953328451
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,128,0,1,float16,float16,0,0.44509867827097577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,128,0,1,float16,fp8,0,0.42989333470662433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,128,0,1,fp8,fp8,0,0.6232426563898722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,128,0,1,fp8,fp8,0,0.39609066645304364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,128,0,1,float16,float16,0,0.4221280018488566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,128,0,1,fp8,fp8,0,0.3726293245951335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,128,0,1,float16,fp8,0,0.4044160048166911
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,128,0,1,float16,float16,0,0.4033493200937907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,128,0,1,float16,fp8,0,0.40919466813405353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,128,0,1,fp8,fp8,0,0.3742133378982544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,128,0,1,float16,float16,0,0.406607985496521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,128,0,1,float16,fp8,0,0.40565331776936847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,128,0,1,fp8,fp8,0,0.3755733172098796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,128,0,1,float16,float16,0,0.4107626676559448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,128,0,1,float16,fp8,0,0.41025598843892414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,128,0,1,fp8,fp8,0,0.3749813238779704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,128,0,1,fp8,fp8,0,5.185407956441243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,128,0,1,float16,fp8,0,7.320032119750977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,128,0,1,float16,float16,0,8.02782948811849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,128,0,1,float16,float16,0,8.076197306315104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,128,0,1,fp8,fp8,0,5.348927815755208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,128,0,1,float16,fp8,0,7.099253336588542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,128,0,1,float16,float16,0,8.66873041788737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,128,0,1,fp8,fp8,0,5.321029345194499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,128,0,1,float16,fp8,0,8.643413543701172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,128,0,1,float16,float16,0,7.980607986450195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,128,0,1,fp8,fp8,0,5.205866813659668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,128,0,1,float16,fp8,0,9.168095906575521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,128,0,1,float16,float16,0,5.107119878133138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,128,0,1,float16,fp8,0,4.887258529663086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,128,0,1,fp8,fp8,0,2.9405654271443686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,128,0,1,float16,float16,0,4.355642636617024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,128,0,1,fp8,fp8,0,2.5616960525512695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,128,0,1,float16,fp8,0,4.108448028564453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,128,0,1,float16,float16,0,3.903114636739095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,128,0,1,fp8,fp8,0,2.570751984914144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,128,0,1,float16,fp8,0,4.361728032430013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,128,0,1,float16,float16,0,4.145413398742676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,128,0,1,fp8,fp8,0,2.5800906817118325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,128,0,1,float16,fp8,0,3.779088020324707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,128,0,1,float16,float16,0,4.039882659912109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,128,0,1,float16,float16,0,1.9355840682983398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,128,0,1,fp8,fp8,0,2.597541332244873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,128,0,1,float16,fp8,0,4.207557360331218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,128,0,1,float16,fp8,0,2.2118345896402993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,128,0,1,float16,float16,0,1.5098613103230794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,128,0,1,fp8,fp8,0,1.5071946779886882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,128,0,1,float16,fp8,0,1.847365379333496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,128,0,1,float16,float16,0,1.5493226051330566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,128,0,1,fp8,fp8,0,1.330901304880778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,128,0,1,float16,fp8,0,1.5074772834777832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,128,0,1,fp8,fp8,0,1.3408160209655762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,128,0,1,fp8,fp8,0,1.8457387288411458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,128,0,1,float16,float16,0,1.722922643025716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,128,0,1,float16,fp8,0,1.5274346669514973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,128,0,1,float16,float16,0,1.1263466676076253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,128,0,1,fp8,fp8,0,1.3522666295369465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,128,0,1,float16,float16,0,1.5180959701538086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,128,0,1,float16,fp8,0,1.5371999740600586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,128,0,1,float16,fp8,0,1.020410696665446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,128,0,1,fp8,fp8,0,0.8029546737670898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,128,0,1,float16,fp8,0,0.8171573479970297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,128,0,1,float16,float16,0,0.9838346640268961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,128,0,1,fp8,fp8,0,0.7189066410064697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,128,0,1,float16,float16,0,0.8087039788564047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,128,0,1,float16,fp8,0,0.8175946871439616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,128,0,1,fp8,fp8,0,0.7198186715443929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,128,0,1,float16,float16,0,0.8092319965362549
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,128,0,1,float16,fp8,0,0.8146773179372152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,128,0,1,float16,float16,0,0.8133813540140787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,128,0,1,fp8,fp8,0,0.7257226308186849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,128,0,1,float16,float16,0,0.4861439863840739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,128,0,1,fp8,fp8,0,0.7294399738311768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,128,0,1,float16,fp8,0,0.8153386910756429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,128,0,1,float16,fp8,0,0.4968746503194173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,128,0,1,fp8,fp8,0,0.4517600138982137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,128,0,1,float16,float16,0,0.4582293430964152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,128,0,1,float16,fp8,0,0.4549493392308553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,128,0,1,fp8,fp8,0,0.41177066167195636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,128,0,1,fp8,fp8,0,0.4116799831390381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,128,0,1,float16,fp8,0,0.45660801728566486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,128,0,1,float16,float16,0,0.4574506680170695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,128,0,1,float16,float16,0,0.4599200089772542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,128,0,1,fp8,fp8,0,0.4155946572621663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,128,0,1,float16,fp8,0,0.45626668135325116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,128,0,1,float16,float16,0,0.4599253336588542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,128,0,1,float16,fp8,0,0.4644426504770915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,128,0,1,float16,float16,0,0.29203200340270996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,128,0,1,float16,fp8,0,0.29734933376312256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,128,0,1,fp8,fp8,0,0.2770559986432393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,128,0,1,fp8,fp8,0,0.4167199929555257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,128,0,1,float16,float16,0,0.2732959985733032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,128,0,1,fp8,fp8,0,0.2569440007209778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,128,0,1,float16,fp8,0,0.27191466093063354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,128,0,1,float16,float16,0,0.271232008934021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,128,0,1,float16,fp8,0,0.2760586738586426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,128,0,1,fp8,fp8,0,0.25547200441360474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,128,0,1,float16,float16,0,0.2726293404897054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,128,0,1,float16,fp8,0,0.27502934137980145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,128,0,1,fp8,fp8,0,0.255237340927124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,128,0,1,float16,float16,0,0.27777065833409625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,128,0,1,float16,fp8,0,0.27697600920995075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,128,0,1,fp8,fp8,0,0.25859200954437256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,128,0,1,fp8,fp8,0,5.5820267995198565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,128,0,1,float16,float16,0,7.437056223551433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,128,0,1,float16,fp8,0,8.272533416748047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,128,0,1,float16,float16,0,8.431423823038736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,128,0,1,float16,fp8,0,7.567861557006836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,128,0,1,fp8,fp8,0,5.606378555297852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,128,0,1,float16,float16,0,8.297029495239258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,128,0,1,float16,fp8,0,8.540202458699545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,128,0,1,fp8,fp8,0,5.614751815795898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,128,0,1,float16,float16,0,7.585946400960286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,128,0,1,fp8,fp8,0,6.342906951904297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,128,0,1,float16,float16,0,4.997754732767741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,128,0,1,float16,fp8,0,9.60433578491211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,128,0,1,float16,fp8,0,4.629983901977539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,128,0,1,fp8,fp8,0,3.6809921264648438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,128,0,1,float16,float16,0,3.929306666056315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,128,0,1,float16,fp8,0,4.490640004475911
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,128,0,1,fp8,fp8,0,2.7838452657063804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,128,0,1,float16,float16,0,3.921482721964518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,128,0,1,fp8,fp8,0,2.79200013478597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,128,0,1,float16,fp8,0,4.3042294184366865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,128,0,1,float16,fp8,0,3.999914805094401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,128,0,1,fp8,fp8,0,2.8154614766438804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,128,0,1,float16,float16,0,3.8491360346476235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,128,0,1,float16,float16,0,4.087893486022949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,128,0,1,float16,float16,0,1.94760529200236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,128,0,1,fp8,fp8,0,2.838517189025879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,128,0,1,float16,fp8,0,2.149967988332113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,128,0,1,float16,fp8,0,3.948575973510742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,128,0,1,fp8,fp8,0,1.6475733121236165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,128,0,1,float16,float16,0,1.5976319313049316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,128,0,1,float16,fp8,0,1.887722651163737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,128,0,1,fp8,fp8,0,1.4279999732971191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,128,0,1,float16,float16,0,1.5977013905843098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,128,0,1,float16,fp8,0,1.6232105890909831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,128,0,1,float16,float16,0,1.7141812642415364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,128,0,1,fp8,fp8,0,1.4344159762064617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,128,0,1,float16,fp8,0,1.68068265914917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,128,0,1,fp8,fp8,0,1.5486613909403484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,128,0,1,float16,fp8,0,1.646549383799235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,128,0,1,float16,float16,0,1.8726612726847331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,128,0,1,float16,float16,0,0.9351999759674072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,128,0,1,fp8,fp8,0,1.4558240572611492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,128,0,1,fp8,fp8,0,0.8668639659881592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,128,0,1,float16,fp8,0,1.2154239813486736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,128,0,1,float16,float16,0,0.8714719613393148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,128,0,1,float16,fp8,0,0.8475680351257324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,128,0,1,fp8,fp8,0,0.753882646560669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,128,0,1,float16,fp8,0,0.882741371790568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,128,0,1,fp8,fp8,0,0.7571093241373698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,128,0,1,float16,float16,0,0.8656906286875407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,128,0,1,float16,float16,0,0.8505120277404785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,128,0,1,fp8,fp8,0,0.7598026593526205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,128,0,1,float16,fp8,0,0.8677653471628824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,128,0,1,float16,float16,0,0.8575572967529297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,128,0,1,float16,float16,0,0.5113706588745117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,128,0,1,float16,fp8,0,0.5167200167973837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,128,0,1,fp8,fp8,0,0.7658773263295492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,128,0,1,float16,fp8,0,0.8545653025309244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,128,0,1,fp8,fp8,0,0.47011200586954754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,128,0,1,float16,float16,0,0.507365345954895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,128,0,1,fp8,fp8,0,0.4142773151397705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,128,0,1,float16,fp8,0,0.45982933044433594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,128,0,1,float16,float16,0,0.4579999844233195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,128,0,1,fp8,fp8,0,0.41467734177907306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,128,0,1,float16,fp8,0,0.46477333704630536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,128,0,1,float16,float16,0,0.46102933088938397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,128,0,1,float16,fp8,0,0.4662453333536784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,128,0,1,fp8,fp8,0,0.418010671933492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,128,0,1,float16,float16,0,0.47035733858744305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,128,0,1,float16,fp8,0,0.4694399833679199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,128,0,1,float16,float16,0,0.28833067417144775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,128,0,1,float16,fp8,0,0.2947733402252197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,128,0,1,fp8,fp8,0,0.2712480028470357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,128,0,1,fp8,fp8,0,0.4225493272145589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,128,0,1,float16,float16,0,0.2609600027402242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,128,0,1,fp8,fp8,0,0.2423306703567505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,128,0,1,float16,fp8,0,0.2589226762453715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,128,0,1,float16,float16,0,0.26046399275461835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,128,0,1,float16,fp8,0,0.26027733087539673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,128,0,1,fp8,fp8,0,0.2463573416074117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,128,0,1,float16,float16,0,0.2605546712875366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,128,0,1,float16,fp8,0,0.2662400007247925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,128,0,1,fp8,fp8,0,0.24344533681869507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,128,0,1,float16,float16,0,0.2680266698201497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,128,0,1,float16,fp8,0,0.26746666431427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,128,0,1,fp8,fp8,0,0.24765866994857788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,128,0,1,float16,float16,0,0.18131732940673828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,128,0,1,float16,fp8,0,0.18346132834752402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,128,0,1,fp8,fp8,0,0.1716266671816508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,128,0,1,float16,float16,0,0.1625759998957316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,128,0,1,float16,fp8,0,0.1620373328526815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,128,0,1,fp8,fp8,0,0.15330132842063904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,128,0,1,float16,float16,0,0.16289066274960837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,128,0,1,float16,fp8,0,0.1629759967327118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,128,0,1,fp8,fp8,0,0.15435199936230978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,128,0,1,float16,float16,0,0.16266133387883505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,128,0,1,float16,fp8,0,0.16359466314315796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,128,0,1,fp8,fp8,0,0.15380266308784485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,128,0,1,float16,float16,0,0.162773331006368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,128,0,1,fp8,fp8,0,0.15425067146619162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,128,0,1,float16,fp8,0,0.1658506691455841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,128,0,1,fp8,fp8,0,3.7530508041381836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,128,0,1,float16,float16,0,4.651845296223958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,128,0,1,float16,fp8,0,4.9781068166097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,128,0,1,float16,float16,0,4.990725199381511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,128,0,1,fp8,fp8,0,3.777637481689453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,128,0,1,float16,fp8,0,4.858426729838054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,128,0,1,float16,float16,0,5.210373242696126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,128,0,1,float16,fp8,0,5.316752115885417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,128,0,1,fp8,fp8,0,3.782677332560221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,128,0,1,float16,float16,0,4.887317339579265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,128,0,1,fp8,fp8,0,3.8330825169881186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,128,0,1,float16,fp8,0,5.211690584818522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,128,0,1,float16,float16,0,2.6080320676167807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,128,0,1,fp8,fp8,0,2.204042593638102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,128,0,1,float16,fp8,0,2.6308959325154624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,128,0,1,float16,float16,0,2.0863466262817383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,128,0,1,fp8,fp8,0,1.8787840207417805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,128,0,1,float16,fp8,0,2.2121599515279136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,128,0,1,float16,float16,0,2.1059786478678384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,128,0,1,fp8,fp8,0,1.8882773717244465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,128,0,1,float16,fp8,0,2.108837286631266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,128,0,1,float16,float16,0,2.1452746391296387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,128,0,1,fp8,fp8,0,1.8948853810628254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,128,0,1,float16,fp8,0,2.236623922983805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,128,0,1,float16,float16,0,2.3296426137288413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,128,0,1,float16,fp8,0,2.139157295227051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,128,0,1,float16,float16,0,1.2647466659545898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,128,0,1,fp8,fp8,0,1.9152639706929524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,128,0,1,float16,fp8,0,1.3020213445027669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,128,0,1,fp8,fp8,0,1.1375306447347004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,128,0,1,float16,float16,0,1.1580906709035237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,128,0,1,float16,fp8,0,1.087178627649943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,128,0,1,fp8,fp8,0,1.044869343439738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,128,0,1,float16,float16,0,1.0867359638214111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,128,0,1,float16,fp8,0,1.0993493398030598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,128,0,1,fp8,fp8,0,1.0241493384043376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,128,0,1,float16,float16,0,1.089578628540039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,128,0,1,float16,fp8,0,1.0953973134358723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,128,0,1,fp8,fp8,0,0.9837813377380371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,128,0,1,float16,float16,0,1.0992266337076824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,128,0,1,float16,float16,0,0.6424160003662109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,128,0,1,fp8,fp8,0,0.9917279879252116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,128,0,1,float16,fp8,0,1.1046133041381836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,128,0,1,float16,fp8,0,0.6586986780166626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,128,0,1,fp8,fp8,0,0.6030879815419515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,128,0,1,float16,float16,0,0.5767946640650431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,128,0,1,fp8,fp8,0,0.5178720156351725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,128,0,1,float16,fp8,0,0.5822133223215739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,128,0,1,float16,float16,0,0.5792106787363688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,128,0,1,fp8,fp8,0,0.5181119839350382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,128,0,1,float16,fp8,0,0.5767519871393839
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,128,0,1,float16,float16,0,0.5762133200963339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,128,0,1,float16,fp8,0,0.5850826501846313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,128,0,1,fp8,fp8,0,0.5210080146789551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,128,0,1,float16,float16,0,0.5843093395233154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,128,0,1,float16,fp8,0,0.5855413277943929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,128,0,1,float16,float16,0,0.35070399443308514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,128,0,1,float16,fp8,0,0.35914134979248047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,128,0,1,fp8,fp8,0,0.33021867275238037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,128,0,1,fp8,fp8,0,0.5281226634979248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,128,0,1,float16,float16,0,0.31338133414586383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,128,0,1,fp8,fp8,0,0.28784533341725665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,128,0,1,float16,fp8,0,0.31546666224797565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,128,0,1,float16,float16,0,0.3145973285039266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,128,0,1,float16,fp8,0,0.31699732939402264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,128,0,1,fp8,fp8,0,0.2882346709569295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,128,0,1,float16,float16,0,0.31627732515335083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,128,0,1,float16,fp8,0,0.318832000096639
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,128,0,1,fp8,fp8,0,0.29181333382924396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,128,0,1,float16,float16,0,0.3200533390045166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,128,0,1,float16,fp8,0,0.3216906587282817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,128,0,1,fp8,fp8,0,0.2948373357454936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,128,0,1,float16,float16,0,0.20377065738042197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,128,0,1,float16,fp8,0,0.20884267489115396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,128,0,1,fp8,fp8,0,0.19363200664520264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,128,0,1,float16,float16,0,0.1824480096499125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,128,0,1,float16,fp8,0,0.18086934089660645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,128,0,1,fp8,fp8,0,0.17017066478729248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,128,0,1,float16,float16,0,0.18005865812301636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,128,0,1,float16,fp8,0,0.18141865730285645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,128,0,1,fp8,fp8,0,0.16884267330169678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,128,0,1,float16,float16,0,0.1816320021947225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,128,0,1,float16,fp8,0,0.1821546753247579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,128,0,1,fp8,fp8,0,0.17297067244847616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,128,0,1,float16,float16,0,0.18243199586868286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,128,0,1,float16,fp8,0,0.18456000089645386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,128,0,1,float16,float16,0,0.13103999694188437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,128,0,1,fp8,fp8,0,0.1743519902229309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,128,0,1,float16,fp8,0,0.13084266583124796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,128,0,1,fp8,fp8,0,0.12572800119717917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,128,0,1,float16,float16,0,0.1165120005607605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,128,0,1,float16,fp8,0,0.11779200037320454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,128,0,1,fp8,fp8,0,0.11365333199501038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,128,0,1,float16,float16,0,0.11753066380818684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,128,0,1,float16,fp8,0,0.11914666493733723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,128,0,1,fp8,fp8,0,0.11372799674669902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,128,0,1,float16,float16,0,0.11718933780988057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,128,0,1,float16,fp8,0,0.11784533659617107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,128,0,1,fp8,fp8,0,0.11405332883199056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,128,0,1,float16,float16,0,0.11793067057927449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,128,0,1,float16,fp8,0,0.11761066317558289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,128,0,1,fp8,fp8,0,0.11334932843844096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,128,0,1,fp8,fp8,0,4.172346750895183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,128,0,1,float16,fp8,0,4.677578608194987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,128,0,1,float16,float16,0,5.107578595479329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,128,0,1,float16,float16,0,4.906149228413899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,128,0,1,float16,fp8,0,4.75271479288737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,128,0,1,fp8,fp8,0,4.431397438049316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,128,0,1,float16,float16,0,5.231866518656413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,128,0,1,float16,fp8,0,5.440757115681966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,128,0,1,fp8,fp8,0,4.449392000834147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,128,0,1,float16,float16,0,5.037786801656087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,128,0,1,float16,fp8,0,5.502533594767253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,128,0,1,fp8,fp8,0,5.002400080362956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,128,0,1,float16,float16,0,2.7253812154134116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,128,0,1,fp8,fp8,0,2.617285410563151
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,128,0,1,float16,fp8,0,3.067706743876139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,128,0,1,float16,float16,0,2.4330879847208657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,128,0,1,float16,fp8,0,2.3136587142944336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,128,0,1,fp8,fp8,0,2.096031983693441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,128,0,1,float16,float16,0,2.615498701731364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,128,0,1,float16,fp8,0,2.3781545956929526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,128,0,1,fp8,fp8,0,2.186453342437744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,128,0,1,float16,float16,0,2.267375946044922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,128,0,1,float16,fp8,0,2.3735574086507163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,128,0,1,fp8,fp8,0,2.21122137705485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,128,0,1,float16,float16,0,2.496074676513672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,128,0,1,float16,fp8,0,2.363957405090332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,128,0,1,float16,float16,0,1.3157920042673747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,128,0,1,float16,fp8,0,1.336154619852702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,128,0,1,fp8,fp8,0,2.4933066368103027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,128,0,1,fp8,fp8,0,1.3161653677622478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,128,0,1,float16,float16,0,1.3119680086771648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,128,0,1,fp8,fp8,0,1.0663093725840251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,128,0,1,float16,fp8,0,1.128544012705485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,128,0,1,float16,float16,0,1.141968011856079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,128,0,1,float16,fp8,0,1.1459413369496663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,128,0,1,fp8,fp8,0,1.0834506352742512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,128,0,1,float16,float16,0,1.1786666711171467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,128,0,1,float16,fp8,0,1.150485356648763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,128,0,1,fp8,fp8,0,1.1301546891530354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,128,0,1,float16,float16,0,1.1717759768168132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,128,0,1,float16,fp8,0,1.177722692489624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,128,0,1,float16,float16,0,0.6796159744262695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,128,0,1,float16,fp8,0,0.6604693333307902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,128,0,1,fp8,fp8,0,1.2510453065236409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,128,0,1,fp8,fp8,0,0.6956319808959961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,128,0,1,float16,float16,0,0.5805386702219645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,128,0,1,float16,fp8,0,0.5856586694717407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,128,0,1,fp8,fp8,0,0.5475626786549886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,128,0,1,float16,float16,0,0.5884373188018799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,128,0,1,float16,fp8,0,0.5871946811676025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,128,0,1,fp8,fp8,0,0.565173347791036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,128,0,1,float16,float16,0,0.5899360179901123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,128,0,1,float16,fp8,0,0.59552001953125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,128,0,1,fp8,fp8,0,0.5629546642303467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,128,0,1,float16,float16,0,0.5977439880371094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,128,0,1,float16,fp8,0,0.5957386493682861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,128,0,1,fp8,fp8,0,0.6115786631902059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,128,0,1,float16,float16,0,0.3501280148824056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,128,0,1,float16,fp8,0,0.3447146813074748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,128,0,1,fp8,fp8,0,0.3444586594899495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,128,0,1,float16,float16,0,0.304693341255188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,128,0,1,float16,fp8,0,0.30381866296132404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,128,0,1,fp8,fp8,0,0.2834933400154114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,128,0,1,float16,float16,0,0.30375466744105023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,128,0,1,float16,fp8,0,0.30721066395441693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,128,0,1,fp8,fp8,0,0.2930720051129659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,128,0,1,float16,float16,0,0.30691732962926227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,128,0,1,fp8,fp8,0,0.2908800045649211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,128,0,1,float16,fp8,0,0.31148266792297363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,128,0,1,float16,float16,0,0.3104959925015767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,128,0,1,float16,fp8,0,0.3108266592025757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,128,0,1,float16,float16,0,0.19342933098475137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,128,0,1,float16,fp8,0,0.1887306571006775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,128,0,1,fp8,fp8,0,0.30088533957799274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,128,0,1,fp8,fp8,0,0.18443200985590616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,128,0,1,float16,float16,0,0.165093332529068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,128,0,1,float16,fp8,0,0.16540799538294473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,128,0,1,fp8,fp8,0,0.15337066849072775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,128,0,1,float16,float16,0,0.16571733355522156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,128,0,1,float16,fp8,0,0.16595199704170227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,128,0,1,fp8,fp8,0,0.15531733632087708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,128,0,1,float16,float16,0,0.164901336034139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,128,0,1,float16,fp8,0,0.16530133287111917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,128,0,1,fp8,fp8,0,0.15691733360290527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,128,0,1,float16,float16,0,0.169429341952006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,128,0,1,float16,fp8,0,0.16859199603398642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,128,0,1,fp8,fp8,0,0.16193600495656332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,128,0,1,float16,float16,0,0.10814932982126872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,128,0,1,float16,fp8,0,0.10680533448855083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,128,0,1,float16,float16,0,0.0920853316783905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,128,0,1,fp8,fp8,0,0.10540266831715901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,128,0,1,fp8,fp8,0,0.08507200082143147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,128,0,1,float16,fp8,0,0.09136000275611877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,128,0,1,float16,float16,0,0.09158399701118469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,128,0,1,float16,fp8,0,0.09118400017420451
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,128,0,1,fp8,fp8,0,0.08646933237711589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,128,0,1,float16,float16,0,0.09088533123334248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,128,0,1,float16,fp8,0,0.09311466415723164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,128,0,1,fp8,fp8,0,0.08487466971079509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,128,0,1,float16,float16,0,0.09382399916648865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,128,0,1,float16,fp8,0,0.09315733114878337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,128,0,1,float16,float16,0,0.06192000210285187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,128,0,1,fp8,fp8,0,0.08840533097585042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,128,0,1,float16,fp8,0,0.06239999830722809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,128,0,1,fp8,fp8,0,0.06422933439413707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,128,0,1,float16,float16,0,0.05740800003210703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,128,0,1,float16,fp8,0,0.0581226646900177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,128,0,1,fp8,fp8,0,0.053397332628568016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,128,0,1,float16,fp8,0,0.058634668588638306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,128,0,1,float16,float16,0,0.05821333328882853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,128,0,1,fp8,fp8,0,0.053946668903032936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,128,0,1,float16,float16,0,0.05816533168156942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,128,0,1,float16,fp8,0,0.05804799993832906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,128,0,1,fp8,fp8,0,0.05522133409976959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,128,0,1,float16,float16,0,0.05808533231417338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,128,0,1,float16,fp8,0,0.05819199979305267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,128,0,1,fp8,fp8,0,0.0561653325955073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,128,0,1,fp8,fp8,0,3.6032638549804688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,128,0,1,float16,fp8,0,3.758453369140625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,128,0,1,float16,float16,0,3.8851467768351235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,128,0,1,float16,float16,0,3.9039039611816406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,128,0,1,float16,fp8,0,3.842975934346517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,128,0,1,fp8,fp8,0,3.8540798823038735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,128,0,1,float16,float16,0,3.921450614929199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,128,0,1,float16,fp8,0,3.9796533584594727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,128,0,1,fp8,fp8,0,3.954064051310221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,128,0,1,float16,float16,0,3.9813172022501626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,128,0,1,float16,fp8,0,3.9381707509358725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,128,0,1,float16,float16,0,2.3464694023132324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,128,0,1,float16,fp8,0,2.5038933753967285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,128,0,1,fp8,fp8,0,4.396698633829753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,128,0,1,fp8,fp8,0,2.3328213691711426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,128,0,1,float16,float16,0,1.8738239606221516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,128,0,1,float16,fp8,0,1.8714772860209148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,128,0,1,fp8,fp8,0,1.805605411529541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,128,0,1,float16,float16,0,1.9092639287312825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,128,0,1,float16,fp8,0,1.9127200444539387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,128,0,1,fp8,fp8,0,1.907898743947347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,128,0,1,float16,float16,0,1.9487093289693196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,128,0,1,float16,fp8,0,1.9340319633483887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,128,0,1,fp8,fp8,0,1.9414347012837727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,128,0,1,float16,float16,0,1.984005292256673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,128,0,1,float16,float16,0,1.1465226809183757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,128,0,1,float16,fp8,0,1.1479520003000896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,128,0,1,float16,fp8,0,1.9974932670593262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,128,0,1,fp8,fp8,0,2.1992692947387695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,128,0,1,fp8,fp8,0,1.154362678527832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,128,0,1,float16,float16,0,0.9551359812418619
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,128,0,1,float16,fp8,0,0.9552960395812988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,128,0,1,fp8,fp8,0,0.9153760274251302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,128,0,1,float16,float16,0,0.969055970509847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,128,0,1,float16,fp8,0,0.974501371383667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,128,0,1,fp8,fp8,0,0.9490986665089926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,128,0,1,float16,float16,0,0.975221316019694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,128,0,1,float16,fp8,0,0.9794399738311768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,128,0,1,fp8,fp8,0,0.9487253030141195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,128,0,1,float16,float16,0,0.9929333527882894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,128,0,1,float16,fp8,0,0.9920266469319662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,128,0,1,float16,float16,0,0.5795679887135824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,128,0,1,float16,fp8,0,0.5733226537704468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,128,0,1,fp8,fp8,0,1.0916746457417805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,128,0,1,fp8,fp8,0,0.5840160051981608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,128,0,1,float16,float16,0,0.4896746476491292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,128,0,1,float16,fp8,0,0.4907999833424886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,128,0,1,fp8,fp8,0,0.46827733516693115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,128,0,1,float16,float16,0,0.49534932772318524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,128,0,1,float16,fp8,0,0.49353599548339844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,128,0,1,fp8,fp8,0,0.4875413179397583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,128,0,1,float16,float16,0,0.4976160128911336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,128,0,1,fp8,fp8,0,0.4784746567408244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,128,0,1,float16,fp8,0,0.49901334444681805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,128,0,1,float16,float16,0,0.5101333459218343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,128,0,1,float16,fp8,0,0.5054346720377604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,128,0,1,float16,float16,0,0.3020586570103963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,128,0,1,fp8,fp8,0,0.53438933690389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,128,0,1,float16,fp8,0,0.2967360019683838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,128,0,1,float16,float16,0,0.25564799706141156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,128,0,1,fp8,fp8,0,0.301146666208903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,128,0,1,fp8,fp8,0,0.24290666977564493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,128,0,1,float16,fp8,0,0.25541333357493085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,128,0,1,float16,float16,0,0.25674132506052655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,128,0,1,float16,fp8,0,0.25629866123199463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,128,0,1,fp8,fp8,0,0.2533866763114929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,128,0,1,float16,float16,0,0.2583893338839213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,128,0,1,float16,fp8,0,0.25919467210769653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,128,0,1,fp8,fp8,0,0.25594133138656616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,128,0,1,float16,float16,0,0.2646399935086568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,128,0,1,float16,fp8,0,0.26308266321818036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,128,0,1,float16,float16,0,0.16586132844289145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,128,0,1,fp8,fp8,0,0.25982399781545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,128,0,1,float16,fp8,0,0.16312000155448914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,128,0,1,fp8,fp8,0,0.16318933169047037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,128,0,1,float16,float16,0,0.13730133573214212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,128,0,1,float16,fp8,0,0.13758933544158936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,128,0,1,fp8,fp8,0,0.13129599889119467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,128,0,1,float16,float16,0,0.13889599839846292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,128,0,1,float16,fp8,0,0.1395199994246165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,128,0,1,fp8,fp8,0,0.13099199533462524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,128,0,1,float16,float16,0,0.13893333077430725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,128,0,1,float16,fp8,0,0.14087466398874918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,128,0,1,fp8,fp8,0,0.13424000144004822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,128,0,1,float16,float16,0,0.1434933344523112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,128,0,1,float16,fp8,0,0.1443893313407898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,128,0,1,float16,float16,0,0.09160000085830688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,128,0,1,float16,fp8,0,0.09169600407282512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,128,0,1,fp8,fp8,0,0.13757866621017456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,128,0,1,fp8,fp8,0,0.09144000212351482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,128,0,1,float16,float16,0,0.07903466622034709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,128,0,1,float16,fp8,0,0.07597333192825317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,128,0,1,fp8,fp8,0,0.07188799977302551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,128,0,1,float16,float16,0,0.07612800101439159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,128,0,1,float16,fp8,0,0.0745119998852412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,128,0,1,fp8,fp8,0,0.0726560006539027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,128,0,1,float16,float16,0,0.07663999994595845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,128,0,1,float16,fp8,0,0.07659199833869934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,128,0,1,fp8,fp8,0,0.07143466671307881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,128,0,1,float16,float16,0,0.07911466558774312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,128,0,1,float16,fp8,0,0.07885866860548656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,128,0,1,fp8,fp8,0,0.07482133309046428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,128,0,1,float16,float16,0,0.05256533126036326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,128,0,1,float16,fp8,0,0.05207466582457224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,128,0,1,fp8,fp8,0,0.05393599967161814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,128,0,1,float16,float16,0,0.0483893354733785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,128,0,1,float16,fp8,0,0.049600000182787575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,128,0,1,fp8,fp8,0,0.04568533102671305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,128,0,1,float16,float16,0,0.04809600114822388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,128,0,1,float16,fp8,0,0.047728002071380615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,128,0,1,fp8,fp8,0,0.04691733419895172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,128,0,1,float16,float16,0,0.04778666794300079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,128,0,1,float16,fp8,0,0.04786666731039683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,128,0,1,fp8,fp8,0,0.04679466784000397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,128,0,1,float16,float16,0,0.04980266590913137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,128,0,1,float16,fp8,0,0.04990399877230326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,128,0,1,fp8,fp8,0,0.04783466458320618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,128,0,1,float16,float16,0,0.03350399931271871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,128,0,1,float16,fp8,0,0.033530667424201965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,128,0,1,fp8,fp8,0,0.03379733363787333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,128,0,1,float16,float16,0,0.031210665901501972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,128,0,1,float16,fp8,0,0.03150933235883713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,128,0,1,fp8,fp8,0,0.031109333038330078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,128,0,1,float16,float16,0,0.03143999973932902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,128,0,1,float16,fp8,0,0.03148799886306127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,128,0,1,fp8,fp8,0,0.030618667602539062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,128,0,1,float16,float16,0,0.031370667119820915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,128,0,1,float16,fp8,0,0.03147733211517334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,128,0,1,fp8,fp8,0,0.03142933299144109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,128,0,1,float16,float16,0,0.031685332457224526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,128,0,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,128,0,1,fp8,fp8,0,0.03145066648721695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,128,0,1,fp8,fp8,0,1.6360267003377278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,128,0,1,float16,float16,0,1.69813871383667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,128,0,1,float16,fp8,0,1.7030933698018391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,128,0,1,float16,float16,0,1.7084426879882812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,128,0,1,float16,fp8,0,1.7285653750101726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,128,0,1,fp8,fp8,0,1.7413066228230794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,128,0,1,float16,fp8,0,1.7413919766743977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,128,0,1,fp8,fp8,0,1.8081332842508953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,128,0,1,float16,float16,0,1.8096426328023274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,128,0,1,float16,float16,0,1.7441280682881672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,128,0,1,float16,float16,0,1.0489813486735027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,128,0,1,float16,fp8,0,1.036784013112386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,128,0,1,float16,fp8,0,1.7840320269266765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,128,0,1,fp8,fp8,0,2.0330773989359536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,128,0,1,float16,float16,0,0.8624693552652994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,128,0,1,fp8,fp8,0,1.0760479768117268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,128,0,1,float16,fp8,0,0.8611253102620443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,128,0,1,fp8,fp8,0,0.8314879735310873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,128,0,1,float16,float16,0,0.874288002649943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,128,0,1,float16,fp8,0,0.8738186359405518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,128,0,1,fp8,fp8,0,0.85426131884257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,128,0,1,float16,float16,0,0.8768213589986166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,128,0,1,float16,fp8,0,0.8879199822743734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,128,0,1,fp8,fp8,0,0.8638506730397543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,128,0,1,float16,float16,0,0.9008479913075765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,128,0,1,float16,fp8,0,0.8973706563313802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,128,0,1,float16,float16,0,0.5355733235677084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,128,0,1,float16,fp8,0,0.5288800001144409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,128,0,1,fp8,fp8,0,1.0124639670054119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,128,0,1,fp8,fp8,0,0.5427840153376261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,128,0,1,float16,float16,0,0.4430239995320638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,128,0,1,float16,fp8,0,0.44167999426523846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,128,0,1,fp8,fp8,0,0.4241280158360799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,128,0,1,float16,fp8,0,0.4474986791610718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,128,0,1,float16,float16,0,0.4483413298924764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,128,0,1,fp8,fp8,0,0.4344586531321208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,128,0,1,float16,float16,0,0.4514400164286296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,128,0,1,float16,fp8,0,0.4501226743062337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,128,0,1,fp8,fp8,0,0.4416586558024089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,128,0,1,float16,float16,0,0.46246933937072754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,128,0,1,float16,float16,0,0.2792853315671285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,128,0,1,float16,fp8,0,0.4596800009409587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,128,0,1,fp8,fp8,0,0.4915999968846639
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,128,0,1,float16,fp8,0,0.27446399132410687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,128,0,1,fp8,fp8,0,0.2804373304049174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,128,0,1,float16,float16,0,0.23305600881576538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,128,0,1,float16,fp8,0,0.23127466440200806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,128,0,1,fp8,fp8,0,0.220960001150767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,128,0,1,float16,float16,0,0.23451733589172363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,128,0,1,float16,fp8,0,0.23522132635116577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,128,0,1,fp8,fp8,0,0.22578666607538858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,128,0,1,float16,float16,0,0.2363199989000956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,128,0,1,float16,fp8,0,0.23640000820159912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,128,0,1,fp8,fp8,0,0.23060800631841025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,128,0,1,float16,float16,0,0.24168533086776733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,128,0,1,float16,fp8,0,0.23972799380620322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,128,0,1,float16,float16,0,0.15152532855669656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,128,0,1,fp8,fp8,0,0.23747734228769937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,128,0,1,float16,fp8,0,0.14851733048756918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,128,0,1,float16,float16,0,0.12282666563987732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,128,0,1,fp8,fp8,0,0.15204800168673197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,128,0,1,float16,fp8,0,0.12333333492279053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,128,0,1,fp8,fp8,0,0.11745599905649821
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,128,0,1,float16,float16,0,0.1242026686668396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,128,0,1,float16,fp8,0,0.1222826639811198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,128,0,1,fp8,fp8,0,0.11776533722877502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,128,0,1,float16,float16,0,0.12492799758911133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,128,0,1,float16,fp8,0,0.12306666374206543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,128,0,1,fp8,fp8,0,0.12054399649302165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,128,0,1,float16,float16,0,0.1276533305644989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,128,0,1,float16,fp8,0,0.12757866581281027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,128,0,1,fp8,fp8,0,0.12518399953842163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,128,0,1,float16,float16,0,0.08575999736785889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,128,0,1,float16,fp8,0,0.08335999647776286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,128,0,1,fp8,fp8,0,0.08647466699282329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,128,0,1,float16,float16,0,0.06889600058396657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,128,0,1,float16,fp8,0,0.06995733578999837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,128,0,1,fp8,fp8,0,0.06648000081380208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,128,0,1,float16,float16,0,0.06905599931875865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,128,0,1,float16,fp8,0,0.0705973356962204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,128,0,1,fp8,fp8,0,0.06592533489068349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,128,0,1,float16,float16,0,0.070592001080513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,128,0,1,float16,fp8,0,0.0705386648575465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,128,0,1,fp8,fp8,0,0.06539200246334076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,128,0,1,float16,float16,0,0.07111466427644093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,128,0,1,float16,fp8,0,0.07044800122578938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,128,0,1,fp8,fp8,0,0.06923200190067291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,128,0,1,float16,float16,0,0.04646400113900503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,128,0,1,float16,fp8,0,0.046725332736968994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,128,0,1,fp8,fp8,0,0.04991999765237173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,128,0,1,float16,float16,0,0.042650664846102394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,128,0,1,float16,fp8,0,0.041840001940727234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,128,0,1,fp8,fp8,0,0.0418453315893809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,128,0,1,float16,float16,0,0.04381333291530609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,128,0,1,float16,fp8,0,0.04404266675313314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,128,0,1,fp8,fp8,0,0.04159999887148539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,128,0,1,float16,float16,0,0.04367466767628988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,128,0,1,float16,fp8,0,0.04366933306058248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,128,0,1,fp8,fp8,0,0.04167999823888143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,128,0,1,float16,float16,0,0.043621331453323364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,128,0,1,float16,fp8,0,0.04401599864164988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,128,0,1,fp8,fp8,0,0.041519999504089355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,128,0,1,float16,float16,0,0.02977599948644638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,128,0,1,float16,fp8,0,0.03125333289305369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,128,0,1,fp8,fp8,0,0.031445334355036415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,128,0,1,float16,float16,0,0.02755733331044515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,128,0,1,float16,fp8,0,0.02829866607983907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,128,0,1,fp8,fp8,0,0.029418667157491047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,128,0,1,float16,float16,0,0.027952000498771667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,128,0,1,float16,fp8,0,0.028538666665554047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,128,0,1,fp8,fp8,0,0.027263998985290527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,128,0,1,float16,float16,0,0.02938133229811986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,128,0,1,float16,fp8,0,0.02942933390537898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,128,0,1,fp8,fp8,0,0.027701333165168762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,128,0,1,float16,float16,0,0.029663999875386555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,128,0,1,float16,fp8,0,0.02924799919128418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,128,0,1,fp8,fp8,0,0.029365333418051403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,128,0,1,float16,float16,0,0.02536533276240031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,128,0,1,float16,fp8,0,0.02409599969784419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,128,0,1,fp8,fp8,0,0.025173333783944447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,128,0,1,float16,fp8,0,0.023408000667889912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,128,0,1,float16,float16,0,0.023567999402681988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,128,0,1,fp8,fp8,0,0.02348266790310542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,128,0,1,float16,float16,0,0.02345066765944163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,128,0,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,128,0,1,fp8,fp8,0,0.023423999547958374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,128,0,1,float16,float16,0,0.02329600105683009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,128,0,1,float16,fp8,0,0.023530667026837666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,128,0,1,fp8,fp8,0,0.023370665808518726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,128,0,1,float16,float16,0,0.024853333830833435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,128,0,1,float16,fp8,0,0.02310933421055476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,128,0,1,fp8,fp8,0,0.023189333577950794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,128,0,1,float16,float16,0,0.9337973594665527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,128,0,1,float16,fp8,0,0.927903970082601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,128,0,1,fp8,fp8,0,0.8993173440297445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,128,0,1,float16,float16,0,0.9426986376444498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,128,0,1,fp8,fp8,0,0.9297973314921061
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,128,0,1,float16,fp8,0,0.9421066443125407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,128,0,1,float16,fp8,0,0.9479040304819742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,128,0,1,fp8,fp8,0,0.9439413547515869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,128,0,1,float16,float16,0,0.9560480117797852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,128,0,1,float16,float16,0,0.9776106675465902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,128,0,1,float16,fp8,0,0.5559200048446655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,128,0,1,float16,float16,0,0.5686986843744913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,128,0,1,float16,fp8,0,0.9713119665781657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,128,0,1,fp8,fp8,0,1.0686453183492024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,128,0,1,fp8,fp8,0,0.5740106503168741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,128,0,1,float16,float16,0,0.476911981900533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,128,0,1,float16,fp8,0,0.47352532545725506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,128,0,1,fp8,fp8,0,0.46083199977874756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,128,0,1,float16,float16,0,0.48006399472554523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,128,0,1,float16,fp8,0,0.48180798689524335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,128,0,1,fp8,fp8,0,0.4670933485031128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,128,0,1,float16,float16,0,0.48744531472524005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,128,0,1,float16,fp8,0,0.4853493372599284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,128,0,1,fp8,fp8,0,0.4763466517130534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,128,0,1,float16,float16,0,0.4976319869359334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,128,0,1,float16,fp8,0,0.4943679968516032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,128,0,1,float16,float16,0,0.2895626624425252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,128,0,1,float16,fp8,0,0.2868640025456746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,128,0,1,fp8,fp8,0,0.5157599846522013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,128,0,1,fp8,fp8,0,0.2955626646677653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,128,0,1,float16,float16,0,0.24649600187937418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,128,0,1,float16,fp8,0,0.24684800704320273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,128,0,1,fp8,fp8,0,0.23691733678181967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,128,0,1,float16,float16,0,0.24893865982691446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,128,0,1,float16,fp8,0,0.2477440039316813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,128,0,1,fp8,fp8,0,0.24445333083470663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,128,0,1,float16,float16,0,0.25060800711313885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,128,0,1,float16,fp8,0,0.2516106764475505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,128,0,1,fp8,fp8,0,0.2422986626625061
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,128,0,1,float16,float16,0,0.2572373350461324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,128,0,1,float16,fp8,0,0.25659199555714923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,128,0,1,fp8,fp8,0,0.24918399254480997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,128,0,1,float16,float16,0,0.1557973325252533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,128,0,1,float16,fp8,0,0.1550826629002889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,128,0,1,fp8,fp8,0,0.1567093332608541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,128,0,1,float16,float16,0,0.1299626628557841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,128,0,1,float16,fp8,0,0.13186132907867432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,128,0,1,fp8,fp8,0,0.12479466199874878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,128,0,1,float16,float16,0,0.1337440013885498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,128,0,1,float16,fp8,0,0.13341333468755087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,128,0,1,fp8,fp8,0,0.12771200140317282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,128,0,1,float16,float16,0,0.13327999909718832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,128,0,1,float16,fp8,0,0.13260799646377563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,128,0,1,fp8,fp8,0,0.12846933801968893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,128,0,1,float16,float16,0,0.13754133383433023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,128,0,1,float16,fp8,0,0.136053333679835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,128,0,1,fp8,fp8,0,0.1329866647720337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,128,0,1,float16,float16,0,0.08514133095741272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,128,0,1,float16,fp8,0,0.08434666196505229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,128,0,1,fp8,fp8,0,0.0883893370628357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,128,0,1,float16,float16,0,0.07067733506361644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,128,0,1,float16,fp8,0,0.0724480003118515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,128,0,1,fp8,fp8,0,0.06643199920654297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,128,0,1,float16,float16,0,0.07275199890136719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,128,0,1,float16,fp8,0,0.07193066676457723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,128,0,1,fp8,fp8,0,0.06720000008742015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,128,0,1,float16,float16,0,0.07256533205509186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,128,0,1,float16,fp8,0,0.07254933317502339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,128,0,1,fp8,fp8,0,0.06797333558400472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,128,0,1,float16,float16,0,0.07460799813270569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,128,0,1,float16,fp8,0,0.0737066666285197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,128,0,1,fp8,fp8,0,0.07074133555094402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,128,0,1,float16,float16,0,0.050245334704717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,128,0,1,float16,fp8,0,0.048623998959859215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,128,0,1,fp8,fp8,0,0.05194133520126343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,128,0,1,float16,float16,0,0.04524800181388855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,128,0,1,float16,fp8,0,0.04577066500981649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,128,0,1,fp8,fp8,0,0.04384533564249674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,128,0,1,float16,float16,0,0.045509333411852516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,128,0,1,float16,fp8,0,0.045007998744646706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,128,0,1,fp8,fp8,0,0.043098668257395424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,128,0,1,float16,float16,0,0.0462773342927297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,128,0,1,float16,fp8,0,0.04422933359940847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,128,0,1,fp8,fp8,0,0.04293333490689596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,128,0,1,float16,float16,0,0.046021332343419395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,128,0,1,float16,fp8,0,0.0461706668138504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,128,0,1,fp8,fp8,0,0.04409599800904592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,128,0,1,float16,float16,0,0.031221332649389904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,128,0,1,float16,fp8,0,0.0315733328461647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,128,0,1,fp8,fp8,0,0.03137599925200144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,128,0,1,float16,float16,0,0.02918400118748347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,128,0,1,float16,fp8,0,0.029813334345817566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,128,0,1,fp8,fp8,0,0.02940800040960312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,128,0,1,float16,float16,0,0.029440000653266907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,128,0,1,float16,fp8,0,0.02996266633272171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,128,0,1,fp8,fp8,0,0.029301332930723827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,128,0,1,float16,float16,0,0.02938666691382726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,128,0,1,float16,fp8,0,0.029258665939172108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,128,0,1,fp8,fp8,0,0.029466666281223297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,128,0,1,float16,float16,0,0.029669334491093952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,128,0,1,float16,fp8,0,0.029520000020662945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,128,0,1,fp8,fp8,0,0.029258665939172108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,128,0,1,float16,float16,0,0.023045333723227184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,128,0,1,float16,fp8,0,0.023050665855407715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,128,0,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,128,0,1,float16,float16,0,0.021189334491888683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,128,0,1,float16,fp8,0,0.021407999098300934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,128,0,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,128,0,1,float16,float16,0,0.021295999487241108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,128,0,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,128,0,1,fp8,fp8,0,0.021386665602525074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,128,0,1,float16,float16,0,0.021295999487241108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,128,0,1,float16,fp8,0,0.021381333470344543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,128,0,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,128,0,1,float16,float16,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,128,0,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,128,0,1,float16,float16,0,0.019093333433071773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,128,0,1,float16,fp8,0,0.021690666675567627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,128,0,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,128,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,128,0,1,float16,float16,0,0.019082666685183842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,128,0,1,float16,fp8,0,0.017792000124851864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,128,0,1,fp8,fp8,0,0.017386666188637417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,128,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,128,0,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,128,0,1,float16,float16,0,0.01897066707412402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,128,0,1,fp8,fp8,0,0.01798933371901512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,128,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,128,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,128,0,1,float16,float16,0,0.01931200052301089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,128,0,1,fp8,fp8,0,0.01942933350801468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,128,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,128,0,1,float16,float16,0,0.6071786483128866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,128,0,1,float16,fp8,0,0.6037973165512085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,128,0,1,fp8,fp8,0,0.5900213321050009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,128,0,1,float16,float16,0,0.6105173428853353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,128,0,1,float16,fp8,0,0.6131680011749268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,128,0,1,fp8,fp8,0,0.6023146708806356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,128,0,1,float16,float16,0,0.616373340288798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,128,0,1,float16,fp8,0,0.6122560103734335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,128,0,1,fp8,fp8,0,0.6039679845174154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,128,0,1,float16,float16,0,0.6291040182113647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,128,0,1,float16,fp8,0,0.6189173460006714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,128,0,1,fp8,fp8,0,0.6410026550292969
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,128,0,1,float16,float16,0,0.35917866230010986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,128,0,1,float16,fp8,0,0.3510773181915283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,128,0,1,fp8,fp8,0,0.36019734541575116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,128,0,1,float16,float16,0,0.3148426612218221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,128,0,1,float16,fp8,0,0.3110026717185974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,128,0,1,fp8,fp8,0,0.30291734139124554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,128,0,1,float16,float16,0,0.31618666648864746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,128,0,1,float16,fp8,0,0.3161333401997884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,128,0,1,fp8,fp8,0,0.30738667647043866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,128,0,1,float16,float16,0,0.31624533732732135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,128,0,1,float16,fp8,0,0.3131519953409831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,128,0,1,fp8,fp8,0,0.31116267045338947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,128,0,1,float16,float16,0,0.32230399052302044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,128,0,1,float16,fp8,0,0.3197386662165324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,128,0,1,float16,float16,0,0.18913066387176514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,128,0,1,fp8,fp8,0,0.3136799931526184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,128,0,1,float16,fp8,0,0.18535999457041422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,128,0,1,fp8,fp8,0,0.1905226707458496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,128,0,1,float16,float16,0,0.1648906668027242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,128,0,1,float16,fp8,0,0.16523200273513794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,128,0,1,float16,float16,0,0.16539733608563742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,128,0,1,fp8,fp8,0,0.1600320041179657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,128,0,1,float16,fp8,0,0.1632266640663147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,128,0,1,fp8,fp8,0,0.1604159971078237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,128,0,1,float16,float16,0,0.1685546636581421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,128,0,1,fp8,fp8,0,0.160863995552063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,128,0,1,float16,float16,0,0.17061867316563925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,128,0,1,float16,fp8,0,0.16715733210245767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,128,0,1,float16,fp8,0,0.16900267203648886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,128,0,1,float16,float16,0,0.10195199648539226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,128,0,1,fp8,fp8,0,0.16394133369127908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,128,0,1,float16,fp8,0,0.09959999720255534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,128,0,1,fp8,fp8,0,0.10397332906723022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,128,0,1,float16,float16,0,0.08911466598510742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,128,0,1,float16,fp8,0,0.088837335507075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,128,0,1,fp8,fp8,0,0.08384000261624654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,128,0,1,float16,float16,0,0.08904533584912618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,128,0,1,float16,fp8,0,0.08861866593360901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,128,0,1,fp8,fp8,0,0.08498666683832805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,128,0,1,float16,float16,0,0.08877866466840108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,128,0,1,float16,fp8,0,0.09014933307965596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,128,0,1,fp8,fp8,0,0.08461866776148479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,128,0,1,float16,float16,0,0.09360000491142273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,128,0,1,float16,fp8,0,0.09044800202051799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,128,0,1,fp8,fp8,0,0.08970133463541667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,128,0,1,float16,float16,0,0.05710933109124502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,128,0,1,fp8,fp8,0,0.059248000383377075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,128,0,1,float16,fp8,0,0.05789333085219065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,128,0,1,float16,float16,0,0.052671998739242554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,128,0,1,float16,fp8,0,0.05309866865475973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,128,0,1,fp8,fp8,0,0.05043200155099233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,128,0,1,float16,float16,0,0.052015999952952065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,128,0,1,float16,fp8,0,0.05193066596984863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,128,0,1,fp8,fp8,0,0.05101866523424784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,128,0,1,float16,float16,0,0.05287999908129374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,128,0,1,float16,fp8,0,0.05197333296140035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,128,0,1,fp8,fp8,0,0.050144001841545105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,128,0,1,float16,float16,0,0.05407466491063436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,128,0,1,float16,fp8,0,0.053802669048309326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,128,0,1,fp8,fp8,0,0.05065600077311198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,128,0,1,float16,float16,0,0.03565866748491923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,128,0,1,float16,fp8,0,0.03551466763019562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,128,0,1,float16,float16,0,0.03365866591533025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,128,0,1,fp8,fp8,0,0.03554133325815201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,128,0,1,float16,fp8,0,0.035386666655540466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,128,0,1,fp8,fp8,0,0.03339199970165888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,128,0,1,float16,float16,0,0.0353973334034284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,128,0,1,float16,fp8,0,0.03400533397992452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,128,0,1,fp8,fp8,0,0.033786666889985405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,128,0,1,float16,float16,0,0.035504000882307686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,128,0,1,float16,fp8,0,0.035599999129772186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,128,0,1,fp8,fp8,0,0.03349866718053818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,128,0,1,float16,float16,0,0.03558400024970373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,128,0,1,float16,fp8,0,0.035360001027584076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,128,0,1,fp8,fp8,0,0.033301333586374916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,128,0,1,float16,float16,0,0.023503998915354412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,128,0,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,128,0,1,fp8,fp8,0,0.02495466669400533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,128,0,1,float16,float16,0,0.0232640008131663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,128,0,1,float16,fp8,0,0.02351466566324234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,128,0,1,fp8,fp8,0,0.02317333221435547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,128,0,1,float16,float16,0,0.02332266668478648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,128,0,1,float16,fp8,0,0.023242667317390442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,128,0,1,fp8,fp8,0,0.02362666775782903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,128,0,1,float16,float16,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,128,0,1,float16,fp8,0,0.023413332800070446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,128,0,1,fp8,fp8,0,0.023200000325838726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,128,0,1,float16,float16,0,0.023141334454218548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,128,0,1,float16,fp8,0,0.025514667232831318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,128,0,1,fp8,fp8,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,128,0,1,float16,float16,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,128,0,1,float16,fp8,0,0.018853332847356796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,128,0,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,128,0,1,float16,float16,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,128,0,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,128,0,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,128,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,128,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,128,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,128,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,128,0,1,fp8,fp8,0,0.019424000134070713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,128,0,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,128,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,128,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,128,0,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,128,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,128,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,128,0,1,float16,fp8,0,0.01759999990463257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,128,0,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,128,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,128,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,128,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,128,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,128,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,128,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,128,0,1,float16,float16,0,0.45044267177581787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,128,0,1,float16,fp8,0,0.45214398701985675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,128,0,1,fp8,fp8,0,0.4336693286895752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,128,0,1,float16,float16,0,0.45505066712697345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,128,0,1,float16,fp8,0,0.4543786843617757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,128,0,1,fp8,fp8,0,0.44091200828552246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,128,0,1,float16,float16,0,0.45181866486867267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,128,0,1,float16,fp8,0,0.45373332500457764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,128,0,1,fp8,fp8,0,0.44039467970530194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,128,0,1,fp8,fp8,0,0.4461013476053874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,128,0,1,float16,fp8,0,0.4620000123977661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,128,0,1,float16,float16,0,0.4568479855855306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,128,0,1,float16,float16,0,0.25909332434336346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,128,0,1,float16,fp8,0,0.25439467032750446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,128,0,1,fp8,fp8,0,0.2547840078671773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,128,0,1,float16,float16,0,0.23620800177256265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,128,0,1,float16,fp8,0,0.23477333784103394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,128,0,1,fp8,fp8,0,0.22636799017588297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,128,0,1,float16,float16,0,0.2365600069363912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,128,0,1,float16,fp8,0,0.23665066560109457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,128,0,1,fp8,fp8,0,0.22697599728902182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,128,0,1,float16,float16,0,0.2375040054321289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,128,0,1,float16,fp8,0,0.23694399992624918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,128,0,1,fp8,fp8,0,0.22757333517074585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,128,0,1,float16,float16,0,0.2398773431777954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,128,0,1,float16,fp8,0,0.23853866259256998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,128,0,1,fp8,fp8,0,0.23066665728886923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,128,0,1,float16,float16,0,0.13736533125241598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,128,0,1,float16,fp8,0,0.1348426640033722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,128,0,1,fp8,fp8,0,0.13704533378283182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,128,0,1,float16,float16,0,0.12430933117866516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,128,0,1,fp8,fp8,0,0.11735467116038005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,128,0,1,float16,fp8,0,0.1269653340180715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,128,0,1,float16,float16,0,0.12435733278592427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,128,0,1,float16,fp8,0,0.12409599622090657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,128,0,1,fp8,fp8,0,0.1176533301671346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,128,0,1,float16,float16,0,0.12685867150624594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,128,0,1,float16,fp8,0,0.1244533360004425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,128,0,1,fp8,fp8,0,0.1178986628850301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,128,0,1,float16,fp8,0,0.12702932953834534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,128,0,1,float16,float16,0,0.1272053321202596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,128,0,1,float16,float16,0,0.0758240024248759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,128,0,1,fp8,fp8,0,0.12139200170834859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,128,0,1,float16,fp8,0,0.07391466697057088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,128,0,1,fp8,fp8,0,0.0765119989713033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,128,0,1,float16,float16,0,0.07083733379840851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,128,0,1,float16,fp8,0,0.07062933345635732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,128,0,1,fp8,fp8,0,0.06699199974536896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,128,0,1,float16,float16,0,0.0710453341404597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,128,0,1,float16,fp8,0,0.0709386666615804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,128,0,1,fp8,fp8,0,0.06740266581376393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,128,0,1,float16,float16,0,0.07147199908892314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,128,0,1,float16,fp8,0,0.07017600039641063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,128,0,1,fp8,fp8,0,0.06630399823188782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,128,0,1,float16,float16,0,0.07115200161933899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,128,0,1,float16,float16,0,0.04370133578777313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,128,0,1,float16,fp8,0,0.07046400010585785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,128,0,1,float16,fp8,0,0.04363733530044556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,128,0,1,fp8,fp8,0,0.06833066542943318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,128,0,1,float16,float16,0,0.041984001795450844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,128,0,1,fp8,fp8,0,0.04297600189844767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,128,0,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,128,0,1,fp8,fp8,0,0.040634666879971824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,128,0,1,float16,float16,0,0.0429013321797053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,128,0,1,float16,fp8,0,0.041834667325019836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,128,0,1,fp8,fp8,0,0.041663999358812966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,128,0,1,float16,float16,0,0.04337066908677419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,128,0,1,float16,fp8,0,0.04155733436346054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,128,0,1,fp8,fp8,0,0.04032533367474874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,128,0,1,float16,float16,0,0.04371733466784159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,128,0,1,float16,fp8,0,0.041637333730856575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,128,0,1,fp8,fp8,0,0.041306667029857635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,128,0,1,float16,float16,0,0.02956799914439519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,128,0,1,float16,fp8,0,0.029445332785447437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,128,0,1,fp8,fp8,0,0.029578665892283123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,128,0,1,float16,float16,0,0.029264000554879505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,128,0,1,float16,fp8,0,0.027456000447273254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,128,0,1,fp8,fp8,0,0.028959999481836956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,128,0,1,float16,float16,0,0.02735466758410136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,128,0,1,float16,fp8,0,0.029279999434947968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,128,0,1,fp8,fp8,0,0.02934933453798294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,128,0,1,float16,float16,0,0.027765333652496338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,128,0,1,float16,fp8,0,0.029487999776999157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,128,0,1,fp8,fp8,0,0.029157333076000214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,128,0,1,float16,float16,0,0.028010666370391846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,128,0,1,float16,fp8,0,0.02935466667016347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,128,0,1,fp8,fp8,0,0.02934933453798294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,128,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,128,0,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,128,0,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,128,0,1,float16,float16,0,0.02163733293612798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,128,0,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,128,0,1,fp8,fp8,0,0.020117333779732387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,128,0,1,float16,float16,0,0.021477334201335907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,128,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,128,0,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,128,0,1,float16,float16,0,0.021359999974568684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,128,0,1,float16,fp8,0,0.020853333175182343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,128,0,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,128,0,1,float16,float16,0,0.021525333325068157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,128,0,1,float16,fp8,0,0.02035733312368393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,128,0,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,128,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,128,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,128,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,128,0,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,128,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,128,0,1,float16,float16,0,0.017477333545684814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,128,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,128,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,128,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,128,0,1,fp8,fp8,0,0.01889066646496455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,128,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,128,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,128,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,128,0,1,fp8,fp8,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,128,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,128,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,128,0,1,fp8,fp8,0,0.01748266691962878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,128,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,128,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,128,0,1,float16,float16,0,0.3738986651102702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,128,0,1,float16,fp8,0,0.36987733840942383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,128,0,1,fp8,fp8,0,0.36349332332611084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,128,0,1,float16,fp8,0,0.3715519905090332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,128,0,1,float16,float16,0,0.37381335099538165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,128,0,1,fp8,fp8,0,0.36452265580495197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,128,0,1,float16,float16,0,0.3747520049413045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,128,0,1,fp8,fp8,0,0.36794666449228924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,128,0,1,float16,fp8,0,0.3721760114034017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,128,0,1,float16,float16,0,0.37508801619211835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,128,0,1,float16,fp8,0,0.37695467472076416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,128,0,1,fp8,fp8,0,0.36828800042470294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,128,0,1,float16,float16,0,0.2083573341369629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,128,0,1,float16,fp8,0,0.2077653408050537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,128,0,1,fp8,fp8,0,0.2064853310585022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,128,0,1,float16,fp8,0,0.1981066664059957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,128,0,1,fp8,fp8,0,0.18728532393773398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,128,0,1,float16,float16,0,0.1965226729710897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,128,0,1,float16,float16,0,0.19390400250752768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,128,0,1,float16,fp8,0,0.19591999053955078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,128,0,1,fp8,fp8,0,0.1879253387451172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,128,0,1,float16,fp8,0,0.19463467597961426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,128,0,1,float16,float16,0,0.19772799809773764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,128,0,1,fp8,fp8,0,0.18730133771896362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,128,0,1,float16,float16,0,0.19757866859436035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,128,0,1,float16,fp8,0,0.19871999820073447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,128,0,1,fp8,fp8,0,0.19049600760142008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,128,0,1,float16,float16,0,0.10941333572069804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,128,0,1,float16,fp8,0,0.10980266332626343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,128,0,1,fp8,fp8,0,0.10962133606274922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,128,0,1,float16,float16,0,0.10557333628336589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,128,0,1,float16,fp8,0,0.10599467158317566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,128,0,1,fp8,fp8,0,0.103301336367925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,128,0,1,float16,float16,0,0.10507733623186748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,128,0,1,float16,fp8,0,0.10628267129262288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,128,0,1,fp8,fp8,0,0.10168533523877461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,128,0,1,float16,float16,0,0.1056160032749176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,128,0,1,float16,fp8,0,0.10595732927322388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,128,0,1,fp8,fp8,0,0.10105599959691365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,128,0,1,float16,float16,0,0.10571733117103577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,128,0,1,float16,fp8,0,0.1058240036169688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,128,0,1,fp8,fp8,0,0.10248000423113506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,128,0,1,float16,float16,0,0.06216000020503998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,128,0,1,float16,fp8,0,0.062181333700815834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,128,0,1,fp8,fp8,0,0.06012799839178721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,128,0,1,float16,float16,0,0.06071466704209646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,128,0,1,float16,fp8,0,0.0602453351020813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,128,0,1,fp8,fp8,0,0.05888533095518748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,128,0,1,float16,float16,0,0.060271998246510826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,128,0,1,float16,fp8,0,0.0601440022389094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,128,0,1,fp8,fp8,0,0.05853333572546641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,128,0,1,float16,float16,0,0.059989333152770996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,128,0,1,float16,fp8,0,0.06035733222961426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,128,0,1,fp8,fp8,0,0.05845333139101664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,128,0,1,float16,float16,0,0.06033066908518473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,128,0,1,float16,fp8,0,0.060133333007494606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,128,0,1,float16,float16,0,0.03740799923737844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,128,0,1,fp8,fp8,0,0.05898666878541311
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,128,0,1,float16,fp8,0,0.03754133234421412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,128,0,1,float16,float16,0,0.035775999228159584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,128,0,1,fp8,fp8,0,0.03781333317359289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,128,0,1,float16,fp8,0,0.037578667203585304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,128,0,1,fp8,fp8,0,0.035445332527160645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,128,0,1,float16,float16,0,0.03741333385308584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,128,0,1,float16,fp8,0,0.03772799919048945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,128,0,1,fp8,fp8,0,0.035455999275048576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,128,0,1,float16,float16,0,0.03756800045569738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,128,0,1,float16,fp8,0,0.037621334195137024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,128,0,1,fp8,fp8,0,0.03543466577927271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,128,0,1,float16,float16,0,0.03573333223660787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,128,0,1,float16,fp8,0,0.03749333322048187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,128,0,1,fp8,fp8,0,0.03714666763941447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,128,0,1,float16,float16,0,0.025418666501839954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,128,0,1,float16,fp8,0,0.02554133286078771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,128,0,1,fp8,fp8,0,0.02731200059254964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,128,0,1,float16,float16,0,0.025360000630219776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,128,0,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,128,0,1,float16,float16,0,0.025472000241279602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,128,0,1,fp8,fp8,0,0.02514133354028066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,128,0,1,float16,fp8,0,0.026335999369621277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,128,0,1,fp8,fp8,0,0.02518933266401291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,128,0,1,float16,float16,0,0.025290665527184803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,128,0,1,float16,fp8,0,0.026165333886941273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,128,0,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,128,0,1,float16,float16,0,0.025242666403452556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,128,0,1,float16,fp8,0,0.025663999219735462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,128,0,1,fp8,fp8,0,0.025290665527184803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,128,0,1,float16,float16,0,0.0201706662774086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,128,0,1,float16,fp8,0,0.019199999670187633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,128,0,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,128,0,1,float16,float16,0,0.021125334004561108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,128,0,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,128,0,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,128,0,1,float16,float16,0,0.021066665649414062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,128,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,128,0,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,128,0,1,float16,float16,0,0.01942933350801468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,128,0,1,float16,fp8,0,0.021407999098300934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,128,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,128,0,1,float16,float16,0,0.01913600042462349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,128,0,1,float16,fp8,0,0.020970667401949566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,128,0,1,fp8,fp8,0,0.019343999524911244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,128,0,1,float16,float16,0,0.01616000011563301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,128,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,128,0,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,128,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,128,0,1,float16,fp8,0,0.0174346665541331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,128,0,1,float16,fp8,0,0.018378666291634243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,128,0,1,float16,float16,0,0.015344000111023584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,128,0,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,128,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,128,0,1,float16,float16,0,0.015381333728631338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,128,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,0,0.3112853368123372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,1,128,0,1,fp8,fp8,0,0.2958773374557495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,0,0.3128426671028137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,0,0.31386133035024005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,0,0.31225599845250446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,2,128,0,1,fp8,fp8,0,0.29607999324798584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,4,128,0,1,fp8,fp8,0,0.29595200220743817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,0,0.3131573398907979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,0,0.31331733862559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,0,0.31303467353185016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,0,0.3129439949989319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,64,8,128,0,1,fp8,fp8,0,0.2970293362935384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,0,0.1630293329556783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,0,0.16286933422088623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,64,128,0,1,fp8,fp8,0,0.1567253371079763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,0,0.1628106633822123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,0,0.16123732924461365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,1,128,0,1,fp8,fp8,0,0.15494400262832642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,0,0.16107733050982156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,0,0.16268799702326456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,2,128,0,1,fp8,fp8,0,0.1546933352947235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,0,0.16114133596420288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,0,0.16078399618466696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,4,128,0,1,fp8,fp8,0,0.15517866611480713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,0,0.16114133596420288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,0,0.16310399770736694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,0,0.08719999591509502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,0,0.08917333682378133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,64,8,128,0,1,fp8,fp8,0,0.1548426647981008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,64,128,0,1,fp8,fp8,0,0.08495466907819112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,0,0.08875733613967896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,0,0.08935466408729553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,1,128,0,1,fp8,fp8,0,0.0849120020866394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,0,0.0888320008913676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,0,0.09000533819198608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,2,128,0,1,fp8,fp8,0,0.08469333251317342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,0,0.0897866686185201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,0,0.08917333682378133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,4,128,0,1,fp8,fp8,0,0.08500267068545024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,0,0.08712533116340637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,0,0.08879466851552327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,64,8,128,0,1,fp8,fp8,0,0.0848426620165507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,0,0.05203199883302053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,0,0.052239999175071716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,64,128,0,1,fp8,fp8,0,0.05000533163547516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,0,0.052416001756985985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,0,0.05231466889381409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,1,128,0,1,fp8,fp8,0,0.049770668148994446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,0,0.05207466582457224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,0,0.05247466762860616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,2,128,0,1,fp8,fp8,0,0.05009066561857859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,0,0.05218133330345154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,0,0.05338666836420695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,4,128,0,1,fp8,fp8,0,0.05017066498597463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,0,0.05194133520126343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,0,0.0518453319867452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,64,8,128,0,1,fp8,fp8,0,0.04997866849104563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,0,0.03201066702604294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,0,0.033488000432650246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,64,128,0,1,fp8,fp8,0,0.03329066683848699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,0,0.03346133232116699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,0,0.03321066747109095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,1,128,0,1,fp8,fp8,0,0.03340800106525421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,0,0.033333333830038704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,0,0.033226666351159416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,2,128,0,1,fp8,fp8,0,0.033701332906881966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,0,0.0336053321758906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,4,128,0,1,fp8,fp8,0,0.033370666205883026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,0,0.035232000052928925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,0,0.03401066611210505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,0,0.03365866591533025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,64,8,128,0,1,fp8,fp8,0,0.03372266640265783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,0,0.023365333676338196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,64,128,0,1,fp8,fp8,0,0.02566933383544286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,0,0.024661332368850708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,0,0.025381334125995636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,0,0.02508266766866048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,1,128,0,1,fp8,fp8,0,0.02515733242034912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,0,0.02533866713444392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,2,128,0,1,fp8,fp8,0,0.025402667621771496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,0,0.024901332954565685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,0,0.025466665625572205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,4,128,0,1,fp8,fp8,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,0,0.024266667664051056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,64,8,128,0,1,fp8,fp8,0,0.02533866713444392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,0,0.020629333953062694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,64,128,0,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,0,0.02110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,0,0.021269333859284718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,1,128,0,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,0,0.02107200026512146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,2,128,0,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,0,0.020810666183630627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,4,128,0,1,fp8,fp8,0,0.019487999379634857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,0,0.021386665602525074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,0,0.019920000185569126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,0,0.020096000283956528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,64,8,128,0,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,0,0.01657066618402799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,64,128,0,1,fp8,fp8,0,0.01676799977819125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,1,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,2,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,0,0.017456000049908955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,4,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,64,8,128,0,1,fp8,fp8,0,0.01868266612291336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,64,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,1,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,2,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,0,0.015642666568358738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,4,128,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,64,8,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,128,0,1,fp8,fp8,0,19.963722229003906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,128,0,1,float16,fp8,0,32.41769663492838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,128,0,1,float16,float16,0,33.84020741780599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,128,0,1,float16,float16,0,35.269083658854164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,128,0,1,fp8,fp8,0,19.05406951904297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,128,0,1,float16,fp8,0,33.28827667236328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,128,0,1,float16,float16,0,33.21340688069662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,128,0,1,fp8,fp8,0,19.739407857259113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,128,0,1,float16,float16,0,17.541941324869793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,128,0,1,float16,fp8,0,34.254740397135414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,128,0,1,fp8,fp8,0,10.244623819986979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,128,0,1,float16,fp8,0,16.859434763590496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,128,0,1,float16,float16,0,17.168516794840496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,128,0,1,fp8,fp8,0,9.51416015625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,128,0,1,float16,fp8,0,17.087034861246746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,128,0,1,float16,float16,0,16.17513656616211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,128,0,1,fp8,fp8,0,9.262869517008463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,128,0,1,float16,fp8,0,17.115428924560547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,128,0,1,float16,float16,0,17.170698801676433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,128,0,1,fp8,fp8,0,9.828863779703775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,128,0,1,float16,fp8,0,16.475072224934895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,128,0,1,float16,float16,0,8.883850733439127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,128,0,1,fp8,fp8,0,4.959013303120931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,128,0,1,float16,fp8,0,8.552410761515299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,128,0,1,fp8,fp8,0,4.694213231404622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,128,0,1,float16,float16,0,8.186138788859049
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,128,0,1,float16,fp8,0,8.357952117919922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,128,0,1,float16,float16,0,8.485589345296225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,128,0,1,fp8,fp8,0,4.988458633422852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,128,0,1,float16,fp8,0,7.961450576782227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,128,0,1,float16,float16,0,8.587322870890299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,128,0,1,fp8,fp8,0,5.119941393534343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,128,0,1,float16,float16,0,3.9142719904581704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,128,0,1,float16,fp8,0,8.655237197875977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,128,0,1,fp8,fp8,0,2.4612533251444497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,128,0,1,float16,fp8,0,4.125402768452962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,128,0,1,float16,float16,0,3.300367991129557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,128,0,1,float16,fp8,0,3.8772427241007485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,128,0,1,fp8,fp8,0,2.379312038421631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,128,0,1,float16,fp8,0,3.767226537068685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,128,0,1,float16,float16,0,4.236543973286946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,128,0,1,fp8,fp8,0,2.50819730758667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,128,0,1,float16,float16,0,4.076709429423015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,128,0,1,fp8,fp8,0,2.5935734113057456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,128,0,1,float16,fp8,0,3.805562655131022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,128,0,1,fp8,fp8,0,12.277540842692057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,128,0,1,float16,float16,0,20.304410298665363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,128,0,1,float16,fp8,0,19.950016021728516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,128,0,1,float16,float16,0,18.223077138264973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,128,0,1,fp8,fp8,0,12.539087931315104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,128,0,1,float16,fp8,0,19.84716796875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,128,0,1,float16,float16,0,18.25320561726888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,128,0,1,float16,fp8,0,18.788623809814453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,128,0,1,float16,float16,0,10.284741083780924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,128,0,1,fp8,fp8,0,12.852352142333984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,128,0,1,fp8,fp8,0,5.988122940063477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,128,0,1,float16,fp8,0,9.753007888793945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,128,0,1,fp8,fp8,0,5.650901158650716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,128,0,1,float16,float16,0,10.053194681803385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,128,0,1,float16,fp8,0,10.420997619628906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,128,0,1,float16,float16,0,9.745370864868164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,128,0,1,float16,fp8,0,9.229461034138998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,128,0,1,fp8,fp8,0,6.021413167317708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,128,0,1,float16,float16,0,10.060538609822592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,128,0,1,float16,float16,0,5.689605077107747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,128,0,1,fp8,fp8,0,5.7364959716796875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,128,0,1,fp8,fp8,0,3.2371253967285156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,128,0,1,float16,fp8,0,9.761226654052734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,128,0,1,float16,fp8,0,5.547807693481445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,128,0,1,float16,float16,0,4.193301200866699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,128,0,1,fp8,fp8,0,2.7050612767537436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,128,0,1,float16,fp8,0,5.117007891337077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,128,0,1,float16,float16,0,4.664442698160808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,128,0,1,float16,fp8,0,4.6653439203898115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,128,0,1,fp8,fp8,0,3.0632108052571616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,128,0,1,float16,float16,0,4.207845369974772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,128,0,1,fp8,fp8,0,3.1307201385498047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,128,0,1,float16,float16,0,2.334400018056234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,128,0,1,float16,fp8,0,4.781989415486653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,128,0,1,float16,fp8,0,2.2265440622965493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,128,0,1,fp8,fp8,0,1.5328532854715984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,128,0,1,float16,float16,0,1.9715894063313801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,128,0,1,float16,fp8,0,2.0988160769144693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,128,0,1,fp8,fp8,0,1.473189353942871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,128,0,1,float16,float16,0,1.700501283009847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,128,0,1,float16,fp8,0,1.847823937733968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,128,0,1,fp8,fp8,0,1.474405288696289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,128,0,1,float16,float16,0,1.831925392150879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,128,0,1,float16,fp8,0,1.7824959754943848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,128,0,1,fp8,fp8,0,1.4785866737365723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,128,0,1,fp8,fp8,0,8.608389536539713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,128,0,1,float16,fp8,0,12.559322357177734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,128,0,1,float16,float16,0,13.153306325276693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,128,0,1,float16,float16,0,13.510037740071615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,128,0,1,fp8,fp8,0,7.981274922688802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,128,0,1,float16,fp8,0,13.994091033935547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,128,0,1,float16,float16,0,13.360026041666666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,128,0,1,float16,fp8,0,14.404341379801432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,128,0,1,fp8,fp8,0,7.8900801340738935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,128,0,1,float16,float16,0,7.449466705322266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,128,0,1,fp8,fp8,0,4.5895945231119795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,128,0,1,float16,fp8,0,7.370197296142578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,128,0,1,float16,float16,0,6.42574946085612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,128,0,1,fp8,fp8,0,4.215322812398274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,128,0,1,float16,fp8,0,6.825632095336914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,128,0,1,float16,float16,0,6.705039978027344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,128,0,1,float16,fp8,0,6.675919850667317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,128,0,1,fp8,fp8,0,4.237925211588542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,128,0,1,float16,float16,0,7.235151926676433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,128,0,1,fp8,fp8,0,4.137040138244629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,128,0,1,float16,float16,0,3.6895840962727866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,128,0,1,float16,fp8,0,6.919488271077474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,128,0,1,fp8,fp8,0,2.0465920766194663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,128,0,1,float16,fp8,0,3.4877281188964844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,128,0,1,float16,float16,0,3.387807846069336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,128,0,1,fp8,fp8,0,1.9572265942891438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,128,0,1,float16,fp8,0,3.44377072652181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,128,0,1,float16,float16,0,3.474773406982422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,128,0,1,float16,fp8,0,3.5945866902669272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,128,0,1,fp8,fp8,0,1.958133379618327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,128,0,1,float16,float16,0,3.448458671569824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,128,0,1,float16,float16,0,1.275429328282674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,128,0,1,fp8,fp8,0,2.105109373728434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,128,0,1,fp8,fp8,0,1.3041280110677083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,128,0,1,float16,fp8,0,3.6242507298787436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,128,0,1,float16,fp8,0,1.7248586018880208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,128,0,1,float16,float16,0,1.5087092717488606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,128,0,1,float16,fp8,0,1.2486613591512044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,128,0,1,fp8,fp8,0,1.1040053367614746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,128,0,1,float16,float16,0,1.5026613871256511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,128,0,1,float16,fp8,0,1.3452159563700359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,128,0,1,fp8,fp8,0,1.1305493513743083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,128,0,1,float16,float16,0,1.2650453249613445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,128,0,1,float16,fp8,0,1.8137386639912922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,128,0,1,fp8,fp8,0,1.1054399808247883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,128,0,1,fp8,fp8,0,11.368330637613932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,128,0,1,float16,float16,0,16.77632013956706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,128,0,1,float16,fp8,0,18.26742426554362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,128,0,1,float16,float16,0,19.43611780802409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,128,0,1,fp8,fp8,0,11.028271993001303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,128,0,1,float16,fp8,0,18.886107126871746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,128,0,1,float16,float16,0,19.805296579996746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,128,0,1,fp8,fp8,0,10.357893625895182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,128,0,1,float16,fp8,0,18.246144612630207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,128,0,1,float16,float16,0,9.197530746459961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,128,0,1,fp8,fp8,0,5.858773549397786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,128,0,1,float16,fp8,0,9.979520161946615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,128,0,1,float16,float16,0,9.473087946573893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,128,0,1,float16,fp8,0,8.950735727945963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,128,0,1,fp8,fp8,0,5.213066736857097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,128,0,1,fp8,fp8,0,5.530656178792317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,128,0,1,float16,float16,0,9.519893646240234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,128,0,1,float16,float16,0,8.662581125895182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,128,0,1,float16,fp8,0,9.51030413309733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,128,0,1,float16,float16,0,4.639493306477864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,128,0,1,fp8,fp8,0,5.92091178894043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,128,0,1,float16,fp8,0,5.041877428690593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,128,0,1,float16,fp8,0,9.40059725443522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,128,0,1,fp8,fp8,0,2.6962401072184243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,128,0,1,float16,float16,0,4.186992009480794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,128,0,1,float16,fp8,0,4.783520062764485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,128,0,1,fp8,fp8,0,2.6248265902201333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,128,0,1,float16,float16,0,4.329957326253255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,128,0,1,fp8,fp8,0,2.638538678487142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,128,0,1,float16,float16,0,4.631135940551758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,128,0,1,float16,fp8,0,4.4681440989176435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,128,0,1,fp8,fp8,0,2.668426513671875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,128,0,1,float16,float16,0,2.257861296335856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,128,0,1,float16,fp8,0,2.194917360941569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,128,0,1,float16,fp8,0,4.251248041788737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,128,0,1,fp8,fp8,0,1.5564586321512859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,128,0,1,float16,fp8,0,1.7279946009318035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,128,0,1,float16,float16,0,1.9954773585001628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,128,0,1,fp8,fp8,0,1.3552746772766113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,128,0,1,float16,float16,0,1.875637372334798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,128,0,1,float16,fp8,0,1.63264004389445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,128,0,1,fp8,fp8,0,1.3599732716878254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,128,0,1,float16,float16,0,1.5543999671936035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,128,0,1,fp8,fp8,0,1.3633653322855632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,128,0,1,float16,fp8,0,1.9495466550191243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,128,0,1,float16,float16,0,1.035429318745931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,128,0,1,float16,fp8,0,0.905519962310791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,128,0,1,fp8,fp8,0,0.8180800278981527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,128,0,1,float16,float16,0,0.8789707024892172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,128,0,1,float16,fp8,0,0.8888373374938965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,128,0,1,fp8,fp8,0,0.7800693511962891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,128,0,1,float16,float16,0,0.8790559768676758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,128,0,1,float16,fp8,0,0.8813599745432535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,128,0,1,fp8,fp8,0,0.7878666718800863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,128,0,1,float16,float16,0,0.8783733050028483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,128,0,1,float16,fp8,0,0.8895520369211832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,128,0,1,fp8,fp8,0,0.8148693243662516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,128,0,1,fp8,fp8,0,6.446250915527344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,128,0,1,float16,float16,0,10.133487701416016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,128,0,1,float16,float16,0,9.96185048421224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,128,0,1,float16,fp8,0,10.758949279785156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,128,0,1,float16,fp8,0,9.96125348409017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,128,0,1,fp8,fp8,0,6.7603200276692705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,128,0,1,float16,float16,0,10.378997166951498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,128,0,1,float16,fp8,0,11.549023946126303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,128,0,1,float16,float16,0,6.317471822102864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,128,0,1,fp8,fp8,0,7.347279866536458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,128,0,1,float16,fp8,0,5.518890380859375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,128,0,1,fp8,fp8,0,3.7467625935872397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,128,0,1,float16,float16,0,5.554922739664714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,128,0,1,float16,fp8,0,5.283285458882649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,128,0,1,fp8,fp8,0,3.0164639155069985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,128,0,1,float16,float16,0,5.420757293701172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,128,0,1,fp8,fp8,0,3.36077880859375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,128,0,1,float16,fp8,0,5.629487991333008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,128,0,1,float16,float16,0,5.090565363566081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,128,0,1,fp8,fp8,0,3.389024098714193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,128,0,1,float16,fp8,0,5.194277445475261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,128,0,1,float16,float16,0,2.988784154256185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,128,0,1,float16,fp8,0,2.273418744405111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,128,0,1,fp8,fp8,0,1.70087464650472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,128,0,1,float16,float16,0,2.0371626218159995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,128,0,1,fp8,fp8,0,1.5755467414855957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,128,0,1,float16,fp8,0,2.5107572873433432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,128,0,1,float16,float16,0,2.3619093894958496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,128,0,1,float16,fp8,0,1.8087679545084636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,128,0,1,fp8,fp8,0,1.7153600056966145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,128,0,1,float16,float16,0,2.561482588450114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,128,0,1,float16,float16,0,1.0462346871693928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,128,0,1,fp8,fp8,0,1.6583466529846191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,128,0,1,float16,fp8,0,2.2274133364359536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,128,0,1,float16,fp8,0,1.271125316619873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,128,0,1,fp8,fp8,0,1.2530986467997234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,128,0,1,float16,float16,0,0.9827626546223959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,128,0,1,float16,fp8,0,1.1421813170115154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,128,0,1,fp8,fp8,0,1.1704800128936768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,128,0,1,float16,float16,0,0.9926613171895345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,128,0,1,float16,fp8,0,0.9855999946594238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,128,0,1,fp8,fp8,0,1.0395519733428955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,128,0,1,float16,float16,0,0.9920960267384847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,128,0,1,float16,fp8,0,1.2093013127644856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,128,0,1,fp8,fp8,0,0.8746506373087565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,128,0,1,float16,float16,0,0.5896960099538168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,128,0,1,float16,fp8,0,0.6003093322118124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,128,0,1,float16,float16,0,0.5720533529917399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,128,0,1,fp8,fp8,0,0.7220693429311117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,128,0,1,float16,fp8,0,0.569973349571228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,128,0,1,fp8,fp8,0,0.5116906563440958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,128,0,1,float16,float16,0,0.5748106638590494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,128,0,1,float16,fp8,0,0.5734879970550537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,128,0,1,fp8,fp8,0,0.513205329577128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,128,0,1,float16,float16,0,0.577461322148641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,128,0,1,float16,fp8,0,0.5789759953816732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,128,0,1,fp8,fp8,0,0.5166879892349243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,128,0,1,fp8,fp8,0,6.111610412597656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,128,0,1,float16,float16,0,9.664597193400065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,128,0,1,float16,fp8,0,9.847647984822592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,128,0,1,float16,float16,0,10.56943448384603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,128,0,1,float16,fp8,0,9.438346862792969
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,128,0,1,fp8,fp8,0,6.442975997924805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,128,0,1,float16,float16,0,10.461423873901367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,128,0,1,float16,fp8,0,10.78872044881185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,128,0,1,float16,float16,0,5.608575820922852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,128,0,1,fp8,fp8,0,6.44218635559082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,128,0,1,fp8,fp8,0,3.9104960759480796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,128,0,1,float16,float16,0,5.053525288899739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,128,0,1,float16,fp8,0,6.14187749226888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,128,0,1,float16,fp8,0,3.9970134099324546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,128,0,1,fp8,fp8,0,3.228288014729818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,128,0,1,fp8,fp8,0,3.0692052841186523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,128,0,1,float16,float16,0,5.045407931009929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,128,0,1,float16,fp8,0,5.304197311401367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,128,0,1,float16,float16,0,5.695632298787435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,128,0,1,float16,float16,0,2.6200159390767417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,128,0,1,fp8,fp8,0,3.4625705083211265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,128,0,1,float16,fp8,0,4.423685391743978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,128,0,1,float16,fp8,0,2.654143969217936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,128,0,1,fp8,fp8,0,1.7420053482055664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,128,0,1,float16,float16,0,1.9019254048665364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,128,0,1,fp8,fp8,0,1.5779679616292317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,128,0,1,float16,fp8,0,2.140725294748942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,128,0,1,float16,float16,0,1.9537547429402669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,128,0,1,float16,fp8,0,1.888543923695882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,128,0,1,fp8,fp8,0,1.5956586201985676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,128,0,1,float16,float16,0,2.137450695037842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,128,0,1,float16,float16,0,1.0262880325317383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,128,0,1,float16,fp8,0,2.1336213747660318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,128,0,1,fp8,fp8,0,1.5985546112060547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,128,0,1,float16,fp8,0,1.245146671930949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,128,0,1,fp8,fp8,0,1.0516213575998943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,128,0,1,float16,fp8,0,0.9595786730448405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,128,0,1,float16,float16,0,1.264463980992635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,128,0,1,fp8,fp8,0,0.8868052959442139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,128,0,1,float16,float16,0,0.9669546286265055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,128,0,1,float16,fp8,0,0.9885813395182291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,128,0,1,float16,float16,0,0.9626826445261637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,128,0,1,fp8,fp8,0,1.0916533470153809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,128,0,1,float16,fp8,0,0.9791839917500814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,128,0,1,float16,float16,0,0.5667200088500977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,128,0,1,float16,fp8,0,0.5743360122044882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,128,0,1,fp8,fp8,0,1.0950346787770588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,128,0,1,fp8,fp8,0,0.5207093159357706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,128,0,1,float16,float16,0,0.5640159845352173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,128,0,1,float16,fp8,0,0.5381226539611816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,128,0,1,fp8,fp8,0,0.476149320602417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,128,0,1,float16,float16,0,0.542954683303833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,128,0,1,fp8,fp8,0,0.48026665051778156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,128,0,1,float16,fp8,0,0.5403306484222412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,128,0,1,float16,float16,0,0.5375893513361613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,128,0,1,float16,float16,0,0.3399146795272827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,128,0,1,float16,fp8,0,0.5452959934870402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,128,0,1,fp8,fp8,0,0.4805973370869954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,128,0,1,float16,fp8,0,0.339957316716512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,128,0,1,fp8,fp8,0,0.31220799684524536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,128,0,1,float16,fp8,0,0.3187040090560913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,128,0,1,float16,float16,0,0.3180053234100342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,128,0,1,fp8,fp8,0,0.29053332408269245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,128,0,1,float16,float16,0,0.3205653429031372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,128,0,1,fp8,fp8,0,0.2940373420715332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,128,0,1,float16,fp8,0,0.32156266768773395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,128,0,1,float16,float16,0,0.3210453391075134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,128,0,1,float16,fp8,0,0.3253546754519145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,128,0,1,fp8,fp8,0,0.2964426676432292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,128,0,1,fp8,fp8,0,3.9012533823649087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,128,0,1,float16,float16,0,5.800629297892253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,128,0,1,float16,fp8,0,5.960389455159505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,128,0,1,float16,float16,0,6.15443738301595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,128,0,1,float16,fp8,0,5.589162826538086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,128,0,1,fp8,fp8,0,3.9161065419514975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,128,0,1,float16,fp8,0,6.136122385660808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,128,0,1,float16,float16,0,6.281717300415039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,128,0,1,float16,float16,0,3.001200040181478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,128,0,1,fp8,fp8,0,3.964874585469564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,128,0,1,fp8,fp8,0,2.2026987075805664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,128,0,1,float16,fp8,0,3.0502827962239585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,128,0,1,float16,float16,0,2.69156805674235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,128,0,1,float16,fp8,0,2.232752005259196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,128,0,1,fp8,fp8,0,1.970597267150879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,128,0,1,float16,float16,0,2.9206453959147134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,128,0,1,float16,fp8,0,2.841813405354818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,128,0,1,fp8,fp8,0,1.9856160481770833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,128,0,1,float16,float16,0,3.1085599263509116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,128,0,1,float16,float16,0,1.2654613653818767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,128,0,1,float16,fp8,0,3.0818611780802407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,128,0,1,fp8,fp8,0,2.0009120305379233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,128,0,1,float16,fp8,0,1.7416319847106934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,128,0,1,fp8,fp8,0,1.3503200213114421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,128,0,1,float16,fp8,0,1.1706079641977947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,128,0,1,float16,float16,0,1.4032853444417317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,128,0,1,fp8,fp8,0,1.5424853960673015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,128,0,1,float16,float16,0,1.1741759777069092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,128,0,1,float16,fp8,0,1.2822773456573486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,128,0,1,fp8,fp8,0,1.1361707051595051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,128,0,1,float16,float16,0,1.1849653720855713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,128,0,1,float16,fp8,0,1.215173323949178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,128,0,1,float16,float16,0,0.6793440183003744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,128,0,1,fp8,fp8,0,1.0472906430562336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,128,0,1,fp8,fp8,0,0.7287840048472086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,128,0,1,float16,fp8,0,0.8892266750335693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,128,0,1,float16,float16,0,0.6484106779098511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,128,0,1,float16,fp8,0,0.6323626836140951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,128,0,1,fp8,fp8,0,0.5620106856028239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,128,0,1,float16,float16,0,0.6301546494166056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,128,0,1,float16,fp8,0,0.6408053239186605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,128,0,1,fp8,fp8,0,0.5633813142776489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,128,0,1,float16,float16,0,0.6348106861114502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,128,0,1,float16,fp8,0,0.7434720198313395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,128,0,1,fp8,fp8,0,0.5754133462905884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,128,0,1,float16,float16,0,0.3797653516133626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,128,0,1,float16,fp8,0,0.3885973294576009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,128,0,1,fp8,fp8,0,0.3522080183029175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,128,0,1,float16,float16,0,0.3588159879048665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,128,0,1,float16,fp8,0,0.3567733367284139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,128,0,1,fp8,fp8,0,0.32391999165217084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,128,0,1,float16,float16,0,0.3574560085932414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,128,0,1,float16,fp8,0,0.360703984896342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,128,0,1,fp8,fp8,0,0.32494932413101196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,128,0,1,fp8,fp8,0,0.32627199093500775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,128,0,1,float16,float16,0,0.3600800037384033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,128,0,1,float16,fp8,0,0.36353600025177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,128,0,1,float16,float16,0,0.2325813372929891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,128,0,1,fp8,fp8,0,0.22047466039657593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,128,0,1,float16,fp8,0,0.23812800645828247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,128,0,1,float16,float16,0,0.21458667516708374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,128,0,1,fp8,fp8,0,0.19818667570749918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,128,0,1,float16,fp8,0,0.21810134251912436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,128,0,1,float16,float16,0,0.21612266699473062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,128,0,1,float16,fp8,0,0.21847466627756754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,128,0,1,fp8,fp8,0,0.20214933156967163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,128,0,1,float16,float16,0,0.21811199188232422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,128,0,1,fp8,fp8,0,0.20572266976038614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,128,0,1,float16,fp8,0,0.21937066316604614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,128,0,1,fp8,fp8,0,4.319434801737468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,128,0,1,float16,float16,0,6.006981531778972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,128,0,1,float16,fp8,0,6.091391881306966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,128,0,1,float16,fp8,0,5.494709650675456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,128,0,1,fp8,fp8,0,4.342026710510254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,128,0,1,float16,float16,0,6.3538773854573565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,128,0,1,float16,float16,0,6.340368270874023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,128,0,1,float16,float16,0,3.5382614135742188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,128,0,1,fp8,fp8,0,4.405189196268718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,128,0,1,float16,fp8,0,3.4657920201619468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,128,0,1,float16,fp8,0,5.884447733561198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,128,0,1,fp8,fp8,0,2.459994633992513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,128,0,1,float16,float16,0,2.824336051940918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,128,0,1,float16,fp8,0,2.821610768636068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,128,0,1,fp8,fp8,0,2.155205408732096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,128,0,1,float16,float16,0,2.710810661315918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,128,0,1,fp8,fp8,0,2.1699253718058267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,128,0,1,float16,fp8,0,2.8531999588012695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,128,0,1,float16,float16,0,2.4761172930399575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,128,0,1,float16,fp8,0,3.1950772603352866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,128,0,1,float16,fp8,0,1.4081066449483235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,128,0,1,float16,float16,0,1.7265332539876301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,128,0,1,fp8,fp8,0,2.224208037058512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,128,0,1,fp8,fp8,0,1.1096853415171306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,128,0,1,float16,float16,0,1.2481919924418132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,128,0,1,float16,fp8,0,1.258522669474284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,128,0,1,fp8,fp8,0,1.591034730275472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,128,0,1,fp8,fp8,0,1.1139520009358723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,128,0,1,float16,fp8,0,1.2625280221303303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,128,0,1,float16,float16,0,1.2666827042897542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,128,0,1,float16,float16,0,1.2645973364512126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,128,0,1,float16,float16,0,0.7944053014119467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,128,0,1,float16,fp8,0,0.7399733066558838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,128,0,1,float16,fp8,0,1.27785062789917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,128,0,1,fp8,fp8,0,1.2028746604919434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,128,0,1,fp8,fp8,0,0.8484053611755371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,128,0,1,float16,float16,0,0.7976586818695068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,128,0,1,fp8,fp8,0,0.5886079867680868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,128,0,1,float16,fp8,0,0.6638079881668091
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,128,0,1,float16,float16,0,0.6678240299224854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,128,0,1,fp8,fp8,0,0.5931466817855835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,128,0,1,float16,float16,0,0.6702293554941813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,128,0,1,float16,fp8,0,0.7848693529764811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,128,0,1,float16,float16,0,0.39396266142527264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,128,0,1,fp8,fp8,0,0.5992853244145712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,128,0,1,float16,fp8,0,0.47277331352233887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,128,0,1,float16,fp8,0,0.6795093218485514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,128,0,1,fp8,fp8,0,0.36376531918843585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,128,0,1,fp8,fp8,0,0.325658659140269
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,128,0,1,float16,float16,0,0.36192532380421955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,128,0,1,float16,fp8,0,0.4281280040740967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,128,0,1,float16,float16,0,0.36235201358795166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,128,0,1,fp8,fp8,0,0.32866134246190387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,128,0,1,float16,fp8,0,0.36472535133361816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,128,0,1,float16,float16,0,0.36580801010131836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,128,0,1,float16,fp8,0,0.3678613503774007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,128,0,1,float16,float16,0,0.2318720022837321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,128,0,1,float16,fp8,0,0.23536000649134317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,128,0,1,fp8,fp8,0,0.3317013382911682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,128,0,1,float16,float16,0,0.2077173391977946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,128,0,1,fp8,fp8,0,0.2136746644973755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,128,0,1,float16,fp8,0,0.20588266849517822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,128,0,1,fp8,fp8,0,0.19339199860890707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,128,0,1,float16,float16,0,0.2066239913304647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,128,0,1,float16,fp8,0,0.20728000005086264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,128,0,1,fp8,fp8,0,0.19488000869750977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,128,0,1,float16,float16,0,0.21182932456334433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,128,0,1,float16,float16,0,0.14547733465830484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,128,0,1,fp8,fp8,0,0.1962560017903646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,128,0,1,float16,fp8,0,0.14566399653752646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,128,0,1,float16,fp8,0,0.21397866805394491
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,128,0,1,fp8,fp8,0,0.13826666275660196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,128,0,1,float16,float16,0,0.13312533497810364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,128,0,1,float16,fp8,0,0.13337600231170654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,128,0,1,fp8,fp8,0,0.12602667013804117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,128,0,1,float16,float16,0,0.13361066579818726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,128,0,1,float16,fp8,0,0.13361600041389465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,128,0,1,fp8,fp8,0,0.12553067008654276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,128,0,1,float16,float16,0,0.1335040032863617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,128,0,1,float16,fp8,0,0.1341653366883596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,128,0,1,fp8,fp8,0,0.12590932846069336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,128,0,1,fp8,fp8,0,2.91428279876709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,128,0,1,float16,float16,0,3.2674026489257812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,128,0,1,float16,fp8,0,3.5267680486043296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,128,0,1,float16,float16,0,3.4707358678181968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,128,0,1,fp8,fp8,0,2.931503931681315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,128,0,1,float16,fp8,0,3.2867838541666665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,128,0,1,float16,float16,0,3.7117865880330405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,128,0,1,float16,float16,0,1.857594648996989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,128,0,1,fp8,fp8,0,2.9640639623006186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,128,0,1,float16,fp8,0,3.699061393737793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,128,0,1,float16,fp8,0,1.9375252723693848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,128,0,1,fp8,fp8,0,1.6937707265218098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,128,0,1,float16,fp8,0,1.6419787406921387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,128,0,1,float16,float16,0,1.6488213539123535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,128,0,1,fp8,fp8,0,1.4592639605204265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,128,0,1,float16,float16,0,1.652938683827718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,128,0,1,fp8,fp8,0,1.473690668741862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,128,0,1,float16,fp8,0,1.6609279314676921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,128,0,1,float16,float16,0,1.6663573582967122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,128,0,1,float16,float16,0,0.9537493387858073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,128,0,1,float16,fp8,0,1.6770079930623372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,128,0,1,fp8,fp8,0,1.4917386372884114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,128,0,1,fp8,fp8,0,0.8740906715393066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,128,0,1,float16,fp8,0,0.9694506327311198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,128,0,1,float16,float16,0,0.8628533681233724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,128,0,1,float16,fp8,0,0.8594133059183756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,128,0,1,fp8,fp8,0,0.7562506993611654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,128,0,1,float16,float16,0,0.8581439654032389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,128,0,1,float16,fp8,0,0.860853354136149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,128,0,1,fp8,fp8,0,0.761840025583903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,128,0,1,float16,float16,0,0.5055253505706787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,128,0,1,float16,float16,0,0.8619466622670492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,128,0,1,float16,fp8,0,0.8694453239440918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,128,0,1,fp8,fp8,0,0.7727146943410238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,128,0,1,float16,fp8,0,0.5183306535085043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,128,0,1,float16,float16,0,0.4527519941329956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,128,0,1,fp8,fp8,0,0.4983146588007609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,128,0,1,float16,fp8,0,0.4569919904073079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,128,0,1,fp8,fp8,0,0.4060106674830119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,128,0,1,float16,float16,0,0.4538293282190959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,128,0,1,fp8,fp8,0,0.4090506633122762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,128,0,1,float16,fp8,0,0.4578506549199422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,128,0,1,float16,float16,0,0.27530133724212646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,128,0,1,float16,float16,0,0.4610186815261841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,128,0,1,fp8,fp8,0,0.41341865062713623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,128,0,1,float16,fp8,0,0.46247466405232746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,128,0,1,fp8,fp8,0,0.2586453358332316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,128,0,1,float16,fp8,0,0.28407466411590576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,128,0,1,float16,float16,0,0.2487199902534485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,128,0,1,float16,fp8,0,0.24846933285395303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,128,0,1,fp8,fp8,0,0.22829866409301758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,128,0,1,float16,float16,0,0.25153066714604694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,128,0,1,fp8,fp8,0,0.22793066501617432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,128,0,1,float16,fp8,0,0.2521439989407857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,128,0,1,float16,float16,0,0.16501333316167197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,128,0,1,float16,float16,0,0.2550080021222432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,128,0,1,float16,fp8,0,0.2543519934018453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,128,0,1,fp8,fp8,0,0.2325013279914856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,128,0,1,float16,fp8,0,0.16805332899093628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,128,0,1,fp8,fp8,0,0.1554080049196879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,128,0,1,float16,float16,0,0.1423306663831075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,128,0,1,float16,fp8,0,0.14386133352915445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,128,0,1,fp8,fp8,0,0.13289599617322287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,128,0,1,float16,float16,0,0.14501333236694336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,128,0,1,fp8,fp8,0,0.13462932904561362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,128,0,1,float16,fp8,0,0.14479466279347739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,128,0,1,float16,float16,0,0.14787200093269348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,128,0,1,float16,float16,0,0.10204799969991048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,128,0,1,float16,fp8,0,0.14779200156529745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,128,0,1,fp8,fp8,0,0.138565331697464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,128,0,1,float16,fp8,0,0.10312533378601074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,128,0,1,fp8,fp8,0,0.10142933328946431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,128,0,1,float16,float16,0,0.09736532966295879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,128,0,1,float16,fp8,0,0.09710400303204854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,128,0,1,fp8,fp8,0,0.09275199969609578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,128,0,1,float16,float16,0,0.09719467163085938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,128,0,1,float16,fp8,0,0.09675199786822002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,128,0,1,fp8,fp8,0,0.093231995900472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,128,0,1,float16,float16,0,0.09659199913342793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,128,0,1,fp8,fp8,0,0.0929813285668691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,128,0,1,float16,fp8,0,0.0979146659374237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,128,0,1,float16,float16,0,3.7289066314697266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,128,0,1,fp8,fp8,0,3.5639785130818686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,128,0,1,float16,fp8,0,3.6097545623779297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,128,0,1,float16,float16,0,4.097029368082683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,128,0,1,fp8,fp8,0,3.6093918482462564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,128,0,1,float16,fp8,0,3.8385388056437173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,128,0,1,float16,float16,0,3.733706792195638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,128,0,1,float16,fp8,0,4.046079953511556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,128,0,1,float16,float16,0,2.1518026987711587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,128,0,1,float16,fp8,0,2.1471254030863443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,128,0,1,fp8,fp8,0,2.0082720120747886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,128,0,1,fp8,fp8,0,3.6348533630371094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,128,0,1,float16,float16,0,1.7638079325358074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,128,0,1,float16,fp8,0,1.7671945889790852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,128,0,1,fp8,fp8,0,1.6828853289286296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,128,0,1,float16,float16,0,1.7601067225138347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,128,0,1,fp8,fp8,0,1.7039252916971843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,128,0,1,float16,fp8,0,1.900762716929118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,128,0,1,float16,float16,0,1.789258639017741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,128,0,1,float16,float16,0,1.029968023300171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,128,0,1,float16,fp8,0,1.804144064585368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,128,0,1,float16,fp8,0,1.0178453127543132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,128,0,1,fp8,fp8,0,1.7966666221618652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,128,0,1,float16,float16,0,0.8898613452911377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,128,0,1,fp8,fp8,0,1.006815989812215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,128,0,1,float16,fp8,0,0.8940693537394205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,128,0,1,fp8,fp8,0,0.8313439687093099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,128,0,1,float16,float16,0,0.8956800301869711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,128,0,1,float16,fp8,0,0.8987200260162354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,128,0,1,fp8,fp8,0,0.8362346490224203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,128,0,1,float16,float16,0,0.9127733707427979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,128,0,1,float16,fp8,0,0.9104693730672201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,128,0,1,fp8,fp8,0,0.875269333521525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,128,0,1,float16,float16,0,0.5296266476313273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,128,0,1,float16,fp8,0,0.5179839928944906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,128,0,1,float16,float16,0,0.45953599611918133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,128,0,1,fp8,fp8,0,0.5141280094782511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,128,0,1,float16,fp8,0,0.4587786595026652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,128,0,1,fp8,fp8,0,0.42819201946258545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,128,0,1,float16,float16,0,0.45875732103983563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,128,0,1,float16,fp8,0,0.4655253489812215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,128,0,1,fp8,fp8,0,0.43110934893290204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,128,0,1,float16,float16,0,0.47244266668955487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,128,0,1,float16,fp8,0,0.4727360010147095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,128,0,1,float16,float16,0,0.2834293246269226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,128,0,1,fp8,fp8,0,0.4480319817860921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,128,0,1,float16,fp8,0,0.2775146762530009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,128,0,1,fp8,fp8,0,0.2722240090370178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,128,0,1,float16,float16,0,0.24580800533294678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,128,0,1,float16,fp8,0,0.24514667193094888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,128,0,1,fp8,fp8,0,0.22925333182017008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,128,0,1,float16,float16,0,0.24637333552042642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,128,0,1,float16,fp8,0,0.24765332539876303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,128,0,1,float16,float16,0,0.25040000677108765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,128,0,1,fp8,fp8,0,0.22985066970189413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,128,0,1,float16,fp8,0,0.25042132536570233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,128,0,1,fp8,fp8,0,0.24052266279856363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,128,0,1,float16,float16,0,0.1556373337904612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,128,0,1,fp8,fp8,0,0.15010666847229004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,128,0,1,float16,fp8,0,0.1532693306605021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,128,0,1,float16,float16,0,0.13154133160909018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,128,0,1,float16,fp8,0,0.13107732931772867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,128,0,1,fp8,fp8,0,0.12446932991345723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,128,0,1,float16,float16,0,0.13331199685732523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,128,0,1,float16,fp8,0,0.13386133313179016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,128,0,1,fp8,fp8,0,0.12533866365750632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,128,0,1,float16,float16,0,0.13693867127100626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,128,0,1,float16,fp8,0,0.1369706690311432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,128,0,1,fp8,fp8,0,0.1330400009950002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,128,0,1,float16,fp8,0,0.0884320040543874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,128,0,1,float16,float16,0,0.09114133318265279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,128,0,1,fp8,fp8,0,0.08941333492596944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,128,0,1,float16,float16,0,0.07843199868996938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,128,0,1,float16,fp8,0,0.07869333525498708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,128,0,1,fp8,fp8,0,0.07203733424345653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,128,0,1,float16,float16,0,0.07707733412583669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,128,0,1,float16,fp8,0,0.07797333101431529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,128,0,1,fp8,fp8,0,0.07097599903742473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,128,0,1,float16,float16,0,0.07870399951934814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,128,0,1,float16,fp8,0,0.07840533554553986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,128,0,1,fp8,fp8,0,0.072543998559316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,128,0,1,float16,float16,0,0.05418133238951365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,128,0,1,float16,fp8,0,0.05407466491063436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,128,0,1,fp8,fp8,0,0.05221333106358846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,128,0,1,float16,float16,0,0.05236266553401947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,128,0,1,fp8,fp8,0,0.04779199759165446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,128,0,1,float16,fp8,0,0.0521919975678126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,128,0,1,float16,float16,0,0.05259199937184652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,128,0,1,float16,fp8,0,0.053861334919929504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,128,0,1,float16,float16,0,0.05208533505598704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,128,0,1,fp8,fp8,0,0.04806933303674062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,128,0,1,float16,fp8,0,0.05251200000445048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,128,0,1,fp8,fp8,0,0.050069332122802734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,128,0,1,float16,float16,0,3.0888001124064126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,128,0,1,float16,fp8,0,3.121391932169596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,128,0,1,fp8,fp8,0,3.091109275817871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,128,0,1,float16,float16,0,3.0992959340413413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,128,0,1,float16,fp8,0,3.1475626627604165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,128,0,1,fp8,fp8,0,3.1495253245035806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,128,0,1,float16,float16,0,3.1464640299479165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,128,0,1,float16,fp8,0,3.187642733256022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,128,0,1,float16,float16,0,1.8370505968729656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,128,0,1,float16,fp8,0,1.8007574081420898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,128,0,1,fp8,fp8,0,1.7849013010660808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,128,0,1,fp8,fp8,0,3.1720053354899087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,128,0,1,float16,float16,0,1.487610658009847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,128,0,1,float16,fp8,0,1.5266772905985515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,128,0,1,fp8,fp8,0,1.5316267013549805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,128,0,1,float16,float16,0,1.5024266242980957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,128,0,1,float16,fp8,0,1.5469279289245605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,128,0,1,fp8,fp8,0,1.5616960525512695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,128,0,1,float16,float16,0,1.5417226155598958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,128,0,1,float16,float16,0,0.8928960164388021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,128,0,1,float16,fp8,0,0.8881706396738688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,128,0,1,float16,fp8,0,1.5497600237528484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,128,0,1,fp8,fp8,0,1.559216022491455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,128,0,1,fp8,fp8,0,0.885317325592041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,128,0,1,float16,float16,0,0.7558826605478922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,128,0,1,float16,fp8,0,0.7638453642527262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,128,0,1,fp8,fp8,0,0.7173973719278971
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,128,0,1,float16,fp8,0,0.7640639940897623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,128,0,1,fp8,fp8,0,0.7221226692199707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,128,0,1,float16,float16,0,0.7598293622334799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,128,0,1,float16,float16,0,0.7796053091684977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,128,0,1,fp8,fp8,0,0.7555359999338785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,128,0,1,float16,fp8,0,0.7794346809387207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,128,0,1,fp8,fp8,0,0.4562346537907918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,128,0,1,float16,float16,0,0.46064531803131104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,128,0,1,float16,fp8,0,0.4537493387858073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,128,0,1,fp8,fp8,0,0.36738133430480957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,128,0,1,float16,fp8,0,0.3924959897994995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,128,0,1,float16,float16,0,0.39112532138824463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,128,0,1,float16,float16,0,0.39135468006134033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,128,0,1,float16,fp8,0,0.393120010693868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,128,0,1,fp8,fp8,0,0.37112001578013104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,128,0,1,float16,float16,0,0.40299733479817706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,128,0,1,fp8,fp8,0,0.3887733221054077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,128,0,1,float16,fp8,0,0.4023413260777791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,128,0,1,float16,fp8,0,0.2385653257369995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,128,0,1,float16,float16,0,0.2464746634165446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,128,0,1,fp8,fp8,0,0.2404586672782898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,128,0,1,fp8,fp8,0,0.1946186621983846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,128,0,1,float16,float16,0,0.20879467328389487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,128,0,1,float16,fp8,0,0.20919466018676758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,128,0,1,float16,float16,0,0.207914670308431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,128,0,1,fp8,fp8,0,0.19798400004704794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,128,0,1,float16,fp8,0,0.20839999119440714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,128,0,1,float16,float16,0,0.21320533752441406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,128,0,1,float16,fp8,0,0.21343467632929483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,128,0,1,fp8,fp8,0,0.20486400524775186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,128,0,1,float16,float16,0,0.13360533118247986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,128,0,1,float16,fp8,0,0.130730668703715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,128,0,1,fp8,fp8,0,0.1311360001564026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,128,0,1,fp8,fp8,0,0.10608533024787903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,128,0,1,float16,float16,0,0.11082667112350464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,128,0,1,float16,fp8,0,0.11078932881355286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,128,0,1,float16,fp8,0,0.11160000165303548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,128,0,1,float16,float16,0,0.11155733466148376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,128,0,1,fp8,fp8,0,0.10756267110506694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,128,0,1,fp8,fp8,0,0.11314133803049724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,128,0,1,float16,fp8,0,0.11477866768836975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,128,0,1,float16,float16,0,0.11598400274912517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,128,0,1,float16,float16,0,0.07707733412583669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,128,0,1,float16,fp8,0,0.07660800218582153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,128,0,1,float16,float16,0,0.06618133187294006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,128,0,1,fp8,fp8,0,0.07971199850241344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,128,0,1,float16,fp8,0,0.06647466619809468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,128,0,1,fp8,fp8,0,0.06169599791367849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,128,0,1,float16,float16,0,0.06526933113733928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,128,0,1,float16,fp8,0,0.06539200246334076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,128,0,1,fp8,fp8,0,0.061893333991368614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,128,0,1,float16,float16,0,0.06625066697597504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,128,0,1,fp8,fp8,0,0.06141866743564606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,128,0,1,float16,fp8,0,0.06634666522343953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,128,0,1,float16,float16,0,0.04589866598447164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,128,0,1,float16,fp8,0,0.045909335215886436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,128,0,1,float16,float16,0,0.04340266684691111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,128,0,1,fp8,fp8,0,0.04359999795754751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,128,0,1,float16,fp8,0,0.043791999419530235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,128,0,1,fp8,fp8,0,0.04148799926042557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,128,0,1,float16,float16,0,0.04354133208592733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,128,0,1,fp8,fp8,0,0.03984000037113825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,128,0,1,float16,fp8,0,0.044218664367993675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,128,0,1,float16,float16,0,0.044405331214269005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,128,0,1,float16,fp8,0,0.04456000030040741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,128,0,1,float16,float16,0,0.03289066751797994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,128,0,1,fp8,fp8,0,0.041834667325019836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,128,0,1,float16,fp8,0,0.03344533344109853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,128,0,1,fp8,fp8,0,0.03166399896144867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,128,0,1,float16,float16,0,0.03177600105603536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,128,0,1,float16,fp8,0,0.03160533308982849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,128,0,1,fp8,fp8,0,0.029391999046007793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,128,0,1,float16,float16,0,0.030997333427270252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,128,0,1,float16,fp8,0,0.03177600105603536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,128,0,1,fp8,fp8,0,0.029978667696317036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,128,0,1,float16,float16,0,0.03145600110292435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,128,0,1,float16,fp8,0,0.031258667508761086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,128,0,1,fp8,fp8,0,0.029653333127498627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,128,0,1,float16,float16,0,1.3471253712972004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,128,0,1,float16,fp8,0,1.376197338104248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,128,0,1,fp8,fp8,0,1.3888479868570964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,128,0,1,float16,float16,0,1.3633813858032227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,128,0,1,float16,fp8,0,1.3747785886128743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,128,0,1,fp8,fp8,0,1.4062132835388184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,128,0,1,float16,float16,0,1.3936479886372883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,128,0,1,float16,fp8,0,1.4348479906717937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,128,0,1,float16,fp8,0,0.8026933670043945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,128,0,1,float16,float16,0,0.8192853132883707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,128,0,1,fp8,fp8,0,0.831653356552124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,128,0,1,fp8,fp8,0,1.4339680671691895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,128,0,1,float16,float16,0,0.6817706425984701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,128,0,1,float16,fp8,0,0.6847093105316162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,128,0,1,fp8,fp8,0,0.6573866605758667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,128,0,1,float16,float16,0,0.685920000076294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,128,0,1,fp8,fp8,0,0.6577333211898804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,128,0,1,float16,fp8,0,0.6896853446960449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,128,0,1,float16,float16,0,0.7109973430633545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,128,0,1,float16,fp8,0,0.7049173514048258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,128,0,1,float16,fp8,0,0.41494933764139813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,128,0,1,float16,float16,0,0.42094401518503827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,128,0,1,fp8,fp8,0,0.6876053015391032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,128,0,1,fp8,fp8,0,0.42390398184458417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,128,0,1,float16,float16,0,0.3535573482513428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,128,0,1,fp8,fp8,0,0.33352001508076984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,128,0,1,float16,fp8,0,0.35339732964833576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,128,0,1,float16,float16,0,0.3561866680781047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,128,0,1,fp8,fp8,0,0.3370933135350545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,128,0,1,float16,fp8,0,0.35381333033243817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,128,0,1,float16,float16,0,0.3678453365961711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,128,0,1,float16,fp8,0,0.3643893400828044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,128,0,1,fp8,fp8,0,0.35239466031392414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,128,0,1,float16,float16,0,0.22639999787012735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,128,0,1,float16,fp8,0,0.22073600689570108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,128,0,1,fp8,fp8,0,0.22390933831532797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,128,0,1,float16,float16,0,0.18870933850606283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,128,0,1,fp8,fp8,0,0.17995200554529825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,128,0,1,float16,fp8,0,0.18809600671132407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,128,0,1,float16,float16,0,0.18896534045537314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,128,0,1,float16,fp8,0,0.19046932458877563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,128,0,1,fp8,fp8,0,0.17940799395243326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,128,0,1,float16,float16,0,0.19426133235295615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,128,0,1,float16,fp8,0,0.1938826640446981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,128,0,1,fp8,fp8,0,0.18755733966827393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,128,0,1,float16,float16,0,0.12500799695650736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,128,0,1,float16,fp8,0,0.12210133671760559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,128,0,1,float16,float16,0,0.10340799887975057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,128,0,1,fp8,fp8,0,0.12196800112724304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,128,0,1,fp8,fp8,0,0.09831466277440389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,128,0,1,float16,fp8,0,0.10362133383750916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,128,0,1,float16,float16,0,0.10339732964833577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,128,0,1,float16,fp8,0,0.10521599650382996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,128,0,1,fp8,fp8,0,0.09773866335550944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,128,0,1,float16,float16,0,0.1074133316675822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,128,0,1,float16,float16,0,0.07087466617425282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,128,0,1,float16,fp8,0,0.10714133580525716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,128,0,1,fp8,fp8,0,0.10355200370152791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,128,0,1,float16,float16,0,0.06044266621271769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,128,0,1,float16,fp8,0,0.06846933563550313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,128,0,1,fp8,fp8,0,0.07409066458543141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,128,0,1,float16,fp8,0,0.05975466469923655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,128,0,1,fp8,fp8,0,0.05434666574001312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,128,0,1,float16,float16,0,0.058746665716171265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,128,0,1,float16,fp8,0,0.05826666454474131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,128,0,1,fp8,fp8,0,0.05485333502292633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,128,0,1,float16,float16,0,0.060421332716941833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,128,0,1,float16,fp8,0,0.06002133091290792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,128,0,1,fp8,fp8,0,0.055013333757718406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,128,0,1,float16,float16,0,0.04385066529115041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,128,0,1,float16,fp8,0,0.04170133173465729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,128,0,1,fp8,fp8,0,0.04165866722663244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,128,0,1,float16,float16,0,0.040576001008351646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,128,0,1,float16,fp8,0,0.04155733436346054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,128,0,1,fp8,fp8,0,0.037818667789300285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,128,0,1,float16,float16,0,0.04182933270931244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,128,0,1,float16,fp8,0,0.04181866844495138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,128,0,1,fp8,fp8,0,0.03743999948104223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,128,0,1,float16,float16,0,0.03947199881076813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,128,0,1,float16,fp8,0,0.039936001102129616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,128,0,1,fp8,fp8,0,0.038058665891488395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,128,0,1,float16,float16,0,0.029274667302767437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,128,0,1,float16,fp8,0,0.029530666768550873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,128,0,1,fp8,fp8,0,0.02815466622511546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,128,0,1,float16,float16,0,0.02934933453798294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,128,0,1,fp8,fp8,0,0.027503999571005504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,128,0,1,float16,fp8,0,0.029296000798543293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,128,0,1,float16,float16,0,0.02924266705910365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,128,0,1,float16,fp8,0,0.027242665489514668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,128,0,1,fp8,fp8,0,0.027701333165168762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,128,0,1,float16,float16,0,0.027456000447273254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,128,0,1,float16,fp8,0,0.027450665831565857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,128,0,1,fp8,fp8,0,0.02775466690460841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,128,0,1,float16,float16,0,0.025242666403452556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,128,0,1,float16,fp8,0,0.025290665527184803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,128,0,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,128,0,1,float16,float16,0,0.02346133440732956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,128,0,1,float16,fp8,0,0.025306666890780132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,128,0,1,fp8,fp8,0,0.023071999351183575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,128,0,1,float16,float16,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,128,0,1,fp8,fp8,0,0.023370665808518726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,128,0,1,float16,float16,0,0.024400000770886738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,128,0,1,float16,fp8,0,0.02548266698916753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,128,0,1,float16,fp8,0,0.025263999899228413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,128,0,1,fp8,fp8,0,0.023247999449570973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,128,0,1,float16,fp8,0,0.7312853336334229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,128,0,1,fp8,fp8,0,0.7158239682515463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,128,0,1,float16,float16,0,0.7285652955373129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,128,0,1,float16,float16,0,0.7336320082346598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,128,0,1,float16,fp8,0,0.7401653130849203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,128,0,1,fp8,fp8,0,0.7352320353190104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,128,0,1,float16,fp8,0,0.7507146994272867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,128,0,1,float16,float16,0,0.7499306996663412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,128,0,1,float16,float16,0,0.4431840181350708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,128,0,1,float16,fp8,0,0.43348264694213867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,128,0,1,fp8,fp8,0,0.44838933149973553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,128,0,1,fp8,fp8,0,0.739349365234375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,128,0,1,float16,float16,0,0.3717120091120402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,128,0,1,float16,fp8,0,0.37411733468373615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,128,0,1,fp8,fp8,0,0.3606666723887126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,128,0,1,float16,float16,0,0.37536001205444336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,128,0,1,float16,fp8,0,0.3767840067545573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,128,0,1,fp8,fp8,0,0.36580801010131836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,128,0,1,float16,float16,0,0.38710931936899823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,128,0,1,float16,fp8,0,0.3834826548894246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,128,0,1,float16,float16,0,0.2345973253250122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,128,0,1,float16,fp8,0,0.2281493345896403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,128,0,1,fp8,fp8,0,0.3795413176218669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,128,0,1,float16,float16,0,0.19868266582489014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,128,0,1,fp8,fp8,0,0.23509333531061807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,128,0,1,float16,fp8,0,0.19664533933003744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,128,0,1,fp8,fp8,0,0.19262933731079102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,128,0,1,float16,float16,0,0.19944000244140625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,128,0,1,float16,fp8,0,0.1986773411432902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,128,0,1,fp8,fp8,0,0.1916159987449646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,128,0,1,float16,float16,0,0.20436267058054605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,128,0,1,float16,fp8,0,0.20241065820058188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,128,0,1,float16,float16,0,0.12558399637540182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,128,0,1,fp8,fp8,0,0.1990399956703186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,128,0,1,float16,fp8,0,0.1239946683247884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,128,0,1,fp8,fp8,0,0.12809066971143088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,128,0,1,float16,float16,0,0.10756267110506694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,128,0,1,float16,fp8,0,0.10788800319035848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,128,0,1,fp8,fp8,0,0.1018986701965332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,128,0,1,float16,float16,0,0.10744532942771912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,128,0,1,float16,fp8,0,0.1076853374640147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,128,0,1,fp8,fp8,0,0.10361066460609436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,128,0,1,float16,fp8,0,0.10959466298421223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,128,0,1,float16,float16,0,0.11060800155003865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,128,0,1,fp8,fp8,0,0.11011200149854024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,128,0,1,float16,float16,0,0.07067733506361644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,128,0,1,float16,fp8,0,0.07054933408896129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,128,0,1,fp8,fp8,0,0.07687999804814656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,128,0,1,float16,float16,0,0.06217066446940104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,128,0,1,float16,fp8,0,0.061978667974472046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,128,0,1,fp8,fp8,0,0.05852266649405161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,128,0,1,float16,float16,0,0.062309334675470986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,128,0,1,float16,fp8,0,0.06215466558933258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,128,0,1,fp8,fp8,0,0.05816000203291575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,128,0,1,float16,float16,0,0.0637546678384145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,128,0,1,float16,fp8,0,0.062447999914487205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,128,0,1,fp8,fp8,0,0.0581226646900177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,128,0,1,float16,float16,0,0.04118400067090988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,128,0,1,float16,fp8,0,0.039690665900707245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,128,0,1,fp8,fp8,0,0.04009599983692169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,128,0,1,float16,float16,0,0.037530665596326195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,128,0,1,float16,fp8,0,0.04142399877309799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,128,0,1,float16,float16,0,0.037445334096749626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,128,0,1,fp8,fp8,0,0.035802667339642845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,128,0,1,float16,fp8,0,0.03967999915281931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,128,0,1,fp8,fp8,0,0.036277333895365395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,128,0,1,float16,float16,0,0.03991466760635376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,128,0,1,float16,fp8,0,0.039706667264302574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,128,0,1,fp8,fp8,0,0.03732266773780187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,128,0,1,float16,float16,0,0.02972800036271413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,128,0,1,float16,fp8,0,0.03124266614516576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,128,0,1,fp8,fp8,0,0.029669334491093952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,128,0,1,float16,float16,0,0.029338667790095013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,128,0,1,float16,fp8,0,0.02900800108909607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,128,0,1,fp8,fp8,0,0.027749332288901012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,128,0,1,float16,float16,0,0.029418667157491047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,128,0,1,float16,fp8,0,0.029504001140594482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,128,0,1,fp8,fp8,0,0.027637332677841187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,128,0,1,float16,float16,0,0.02808533360560735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,128,0,1,float16,fp8,0,0.029845332105954487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,128,0,1,fp8,fp8,0,0.028229333460330963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,128,0,1,float16,float16,0,0.021183999876181286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,128,0,1,float16,fp8,0,0.0225600004196167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,128,0,1,fp8,fp8,0,0.023034666975339253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,128,0,1,float16,float16,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,128,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,128,0,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,128,0,1,float16,float16,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,128,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,128,0,1,fp8,fp8,0,0.021583999196688335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,128,0,1,float16,float16,0,0.020981334149837494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,128,0,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,128,0,1,fp8,fp8,0,0.020549333343903225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,128,0,1,float16,float16,0,0.01937599976857503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,128,0,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,128,0,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,128,0,1,float16,float16,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,128,0,1,float16,fp8,0,0.019391999890406925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,128,0,1,float16,float16,0,0.01869333287080129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,128,0,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,128,0,1,fp8,fp8,0,0.0179626668492953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,128,0,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,128,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,128,0,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,128,0,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,128,0,1,float16,float16,0,0.4731359879175822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,128,0,1,fp8,fp8,0,0.4582666556040446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,128,0,1,float16,fp8,0,0.47094400723775226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,128,0,1,float16,float16,0,0.47469866275787354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,128,0,1,float16,fp8,0,0.47464533646901447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,128,0,1,fp8,fp8,0,0.4647680123647054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,128,0,1,float16,float16,0,0.4817119836807251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,128,0,1,float16,fp8,0,0.4816746711730957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,128,0,1,float16,float16,0,0.28069865703582764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,128,0,1,float16,fp8,0,0.27645333607991535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,128,0,1,fp8,fp8,0,0.4761226574579875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,128,0,1,fp8,fp8,0,0.28194665908813477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,128,0,1,float16,float16,0,0.24498132864634195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,128,0,1,float16,fp8,0,0.24763200680414835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,128,0,1,fp8,fp8,0,0.23887999852498373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,128,0,1,float16,float16,0,0.2466826637585958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,128,0,1,float16,fp8,0,0.24676799774169922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,128,0,1,fp8,fp8,0,0.23904534180959067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,128,0,1,float16,fp8,0,0.2501866618792216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,128,0,1,float16,float16,0,0.2505066593488057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,128,0,1,fp8,fp8,0,0.2471733291943868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,128,0,1,float16,float16,0,0.15052800377209982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,128,0,1,float16,fp8,0,0.14612799882888794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,128,0,1,fp8,fp8,0,0.15291733543078104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,128,0,1,float16,float16,0,0.13251733779907227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,128,0,1,float16,fp8,0,0.13270933429400125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,128,0,1,fp8,fp8,0,0.12706133723258972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,128,0,1,float16,float16,0,0.1318719983100891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,128,0,1,float16,fp8,0,0.13251733779907227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,128,0,1,fp8,fp8,0,0.12809600432713827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,128,0,1,float16,float16,0,0.1348479986190796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,128,0,1,float16,fp8,0,0.1344373325506846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,128,0,1,fp8,fp8,0,0.13343466321627298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,128,0,1,float16,float16,0,0.08281066517035167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,128,0,1,float16,fp8,0,0.08307733138402303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,128,0,1,fp8,fp8,0,0.08845333258310954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,128,0,1,float16,float16,0,0.07382399837176006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,128,0,1,float16,fp8,0,0.07261333366235097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,128,0,1,fp8,fp8,0,0.07035199801127116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,128,0,1,float16,float16,0,0.07496533294518788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,128,0,1,float16,fp8,0,0.07306666672229767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,128,0,1,fp8,fp8,0,0.0703413337469101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,128,0,1,float16,float16,0,0.07452799876530965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,128,0,1,float16,fp8,0,0.07433600227038066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,128,0,1,float16,float16,0,0.04804266492525736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,128,0,1,fp8,fp8,0,0.07122133175532024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,128,0,1,float16,fp8,0,0.047872001926104225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,128,0,1,fp8,fp8,0,0.04810666541258494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,128,0,1,float16,float16,0,0.04550399879614512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,128,0,1,float16,fp8,0,0.04571733375390371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,128,0,1,fp8,fp8,0,0.04364266494909922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,128,0,1,float16,float16,0,0.04695466657479604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,128,0,1,float16,fp8,0,0.0459199994802475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,128,0,1,fp8,fp8,0,0.04353600243727366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,128,0,1,float16,float16,0,0.04560000201066335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,128,0,1,float16,fp8,0,0.045754666129748024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,128,0,1,fp8,fp8,0,0.04342400034268697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,128,0,1,float16,float16,0,0.03154666721820831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,128,0,1,float16,fp8,0,0.031445334355036415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,128,0,1,fp8,fp8,0,0.03143466760714849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,128,0,1,float16,float16,0,0.031354665756225586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,128,0,1,float16,fp8,0,0.03156800071398417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,128,0,1,fp8,fp8,0,0.030405332644780476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,128,0,1,float16,fp8,0,0.03155199935038885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,128,0,1,float16,float16,0,0.03140799949566523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,128,0,1,fp8,fp8,0,0.030048000315825146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,128,0,1,float16,float16,0,0.03182933231194814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,128,0,1,float16,fp8,0,0.031712000568707786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,128,0,1,fp8,fp8,0,0.029418667157491047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,128,0,1,float16,float16,0,0.023423999547958374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,128,0,1,float16,fp8,0,0.024559999505678814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,128,0,1,fp8,fp8,0,0.023168000082174938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,128,0,1,float16,float16,0,0.02314666658639908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,128,0,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,128,0,1,fp8,fp8,0,0.02327466756105423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,128,0,1,float16,float16,0,0.02319466571013133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,128,0,1,float16,fp8,0,0.02316266546646754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,128,0,1,fp8,fp8,0,0.02325333406527837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,128,0,1,float16,float16,0,0.023386667172114056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,128,0,1,float16,fp8,0,0.023423999547958374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,128,0,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,128,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,128,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,128,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,128,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,128,0,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,128,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,128,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,128,0,1,float16,fp8,0,0.017621333400408428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,128,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,128,0,1,float16,float16,0,0.019013332823912304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,128,0,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,128,0,1,float16,float16,0,0.01802666609485944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,128,0,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,128,0,1,float16,float16,0,0.017375999440749485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,128,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,128,0,1,float16,float16,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,128,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,128,0,1,float16,fp8,0,0.018320000420014065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,128,0,1,float16,float16,0,0.017632000148296356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,128,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,128,0,1,float16,float16,0,0.3492639859517415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,128,0,1,float16,fp8,0,0.34920533498128253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,128,0,1,float16,float16,0,0.3455093304316203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,128,0,1,fp8,fp8,0,0.33717866738637287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,128,0,1,float16,fp8,0,0.3491520086924235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,128,0,1,fp8,fp8,0,0.3373279968897502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,128,0,1,float16,float16,0,0.3502773443857829
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,128,0,1,float16,fp8,0,0.3532960017522176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,128,0,1,float16,float16,0,0.20112532377243042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,128,0,1,fp8,fp8,0,0.3444053332010905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,128,0,1,float16,fp8,0,0.1970133384068807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,128,0,1,fp8,fp8,0,0.19992534319559732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,128,0,1,float16,float16,0,0.18378132581710815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,128,0,1,float16,fp8,0,0.18361065785090128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,128,0,1,fp8,fp8,0,0.17524800697962442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,128,0,1,float16,float16,0,0.18224000930786133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,128,0,1,float16,fp8,0,0.18281600872675577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,128,0,1,fp8,fp8,0,0.17700799306233725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,128,0,1,float16,float16,0,0.18481600284576416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,128,0,1,float16,fp8,0,0.18488534291585287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,128,0,1,fp8,fp8,0,0.18120533227920532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,128,0,1,float16,float16,0,0.1085599958896637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,128,0,1,fp8,fp8,0,0.11077333490053813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,128,0,1,float16,fp8,0,0.10912533601125081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,128,0,1,float16,float16,0,0.09947733084360759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,128,0,1,float16,fp8,0,0.0995199978351593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,128,0,1,fp8,fp8,0,0.09330667058626811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,128,0,1,float16,float16,0,0.09922132889429729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,128,0,1,float16,fp8,0,0.0992746651172638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,128,0,1,fp8,fp8,0,0.09327999750773112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,128,0,1,float16,float16,0,0.09931199749310811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,128,0,1,float16,fp8,0,0.09992000460624695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,128,0,1,fp8,fp8,0,0.09551999966303508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,128,0,1,float16,float16,0,0.06062399844328562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,128,0,1,float16,fp8,0,0.05870933334032694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,128,0,1,float16,float16,0,0.05804799993832906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,128,0,1,fp8,fp8,0,0.05834133426348368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,128,0,1,float16,fp8,0,0.05690133571624756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,128,0,1,fp8,fp8,0,0.05644266804059347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,128,0,1,float16,float16,0,0.05714133381843567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,128,0,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,128,0,1,fp8,fp8,0,0.056277334690093994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,128,0,1,float16,float16,0,0.058378666639328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,128,0,1,float16,fp8,0,0.05856533348560333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,128,0,1,fp8,fp8,0,0.054661333560943604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,128,0,1,float16,float16,0,0.039520000418027244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,128,0,1,float16,fp8,0,0.03984000037113825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,128,0,1,fp8,fp8,0,0.037658666570981346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,128,0,1,float16,float16,0,0.03761066744724909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,128,0,1,float16,fp8,0,0.03775466730197271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,128,0,1,fp8,fp8,0,0.036943999429543815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,128,0,1,float16,float16,0,0.037802666425704956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,128,0,1,float16,fp8,0,0.03777066618204117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,128,0,1,fp8,fp8,0,0.0371573343873024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,128,0,1,float16,float16,0,0.03763733307520548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,128,0,1,float16,fp8,0,0.03757333258787791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,128,0,1,float16,float16,0,0.025583999852339428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,128,0,1,fp8,fp8,0,0.03569599986076355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,128,0,1,float16,fp8,0,0.025616000096003216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,128,0,1,float16,float16,0,0.02502399931351344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,128,0,1,fp8,fp8,0,0.027263998985290527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,128,0,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,128,0,1,fp8,fp8,0,0.025050667424996693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,128,0,1,float16,float16,0,0.025477332373460133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,128,0,1,float16,fp8,0,0.025514667232831318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,128,0,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,128,0,1,float16,float16,0,0.025642665723959606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,128,0,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,128,0,1,fp8,fp8,0,0.02568000058333079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,128,0,1,float16,float16,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,128,0,1,float16,fp8,0,0.02143999934196472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,128,0,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,128,0,1,float16,float16,0,0.019152000546455383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,128,0,1,float16,fp8,0,0.019440000255902607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,128,0,1,fp8,fp8,0,0.02080533280968666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,128,0,1,float16,float16,0,0.01942933350801468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,128,0,1,float16,fp8,0,0.019786667078733444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,128,0,1,fp8,fp8,0,0.021701333423455555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,128,0,1,float16,float16,0,0.019440000255902607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,128,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,128,0,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,128,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,128,0,1,float16,float16,0,0.017370666066805523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,128,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,128,0,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,128,0,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,128,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,128,0,1,float16,fp8,0,0.01775466650724411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,128,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,128,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,128,0,1,float16,float16,0,0.015562667200962702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,128,0,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,128,0,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,128,0,1,fp8,fp8,0,0.01552533358335495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,128,0,1,float16,float16,0,0.015423999478419622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,128,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,128,0,1,float16,float16,0,0.28430400292078656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,128,0,1,float16,fp8,0,0.2873599926630656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,128,0,1,fp8,fp8,0,0.27613866329193115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,128,0,1,float16,float16,0,0.28618667523066205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,128,0,1,float16,fp8,0,0.2853920062383016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,128,0,1,fp8,fp8,0,0.2781226634979248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,128,0,1,float16,float16,0,0.2884426712989807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,128,0,1,float16,float16,0,0.1613866686820984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,128,0,1,float16,fp8,0,0.2879626750946045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,128,0,1,fp8,fp8,0,0.28381866216659546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,128,0,1,float16,fp8,0,0.16100266575813293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,128,0,1,fp8,fp8,0,0.1627786656220754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,128,0,1,float16,float16,0,0.15154133240381876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,128,0,1,float16,fp8,0,0.152346670627594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,128,0,1,fp8,fp8,0,0.14441066980361938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,128,0,1,float16,float16,0,0.15084266662597656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,128,0,1,float16,fp8,0,0.15077333648999533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,128,0,1,fp8,fp8,0,0.14430399735768637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,128,0,1,float16,float16,0,0.15178666512171426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,128,0,1,float16,fp8,0,0.15177067120869955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,128,0,1,float16,float16,0,0.08526933193206787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,128,0,1,fp8,fp8,0,0.1443946659564972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,128,0,1,float16,fp8,0,0.08469333251317342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,128,0,1,fp8,fp8,0,0.08348799745241801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,128,0,1,float16,float16,0,0.08290666838486989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,128,0,1,float16,fp8,0,0.08318399886290233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,128,0,1,fp8,fp8,0,0.08050133287906647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,128,0,1,float16,float16,0,0.08318933347860973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,128,0,1,float16,fp8,0,0.0843999981880188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,128,0,1,fp8,fp8,0,0.0811413327852885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,128,0,1,float16,float16,0,0.08385066191355388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,128,0,1,float16,fp8,0,0.0846613347530365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,128,0,1,float16,float16,0,0.0514933317899704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,128,0,1,fp8,fp8,0,0.08100266754627228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,128,0,1,float16,fp8,0,0.05147733290990194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,128,0,1,fp8,fp8,0,0.05111999809741974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,128,0,1,float16,float16,0,0.05008533100287119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,128,0,1,float16,fp8,0,0.04965866605440775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,128,0,1,fp8,fp8,0,0.049957334995269775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,128,0,1,float16,float16,0,0.0499839981396993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,128,0,1,float16,fp8,0,0.050101334849993386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,128,0,1,fp8,fp8,0,0.04786133269468943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,128,0,1,float16,float16,0,0.05002133548259735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,128,0,1,float16,fp8,0,0.05014933149019877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,128,0,1,float16,float16,0,0.03331733246644338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,128,0,1,fp8,fp8,0,0.04836800197760264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,128,0,1,float16,fp8,0,0.03363733241955439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,128,0,1,fp8,fp8,0,0.033861334125200905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,128,0,1,float16,float16,0,0.0335359995563825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,128,0,1,float16,fp8,0,0.03363733241955439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,128,0,1,fp8,fp8,0,0.03329066683848699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,128,0,1,float16,float16,0,0.03335466732581457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,128,0,1,float16,fp8,0,0.03158933420976003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,128,0,1,fp8,fp8,0,0.03151999910672506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,128,0,1,float16,float16,0,0.031658666829268135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,128,0,1,float16,fp8,0,0.0315733328461647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,128,0,1,fp8,fp8,0,0.03180799881617228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,128,0,1,float16,float16,0,0.02346133440732956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,128,0,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,128,0,1,fp8,fp8,0,0.0233599990606308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,128,0,1,float16,float16,0,0.02292799949645996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,128,0,1,float16,fp8,0,0.023354666928450268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,128,0,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,128,0,1,float16,float16,0,0.023269332945346832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,128,0,1,float16,fp8,0,0.023226665953795116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,128,0,1,fp8,fp8,0,0.02309333284695943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,128,0,1,float16,float16,0,0.023200000325838726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,128,0,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,128,0,1,fp8,fp8,0,0.023365333676338196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,128,0,1,float16,float16,0,0.019333332777023315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,128,0,1,float16,fp8,0,0.020762667059898376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,128,0,1,fp8,fp8,0,0.020810666183630627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,128,0,1,float16,float16,0,0.0195573332409064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,128,0,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,128,0,1,fp8,fp8,0,0.021226666867733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,128,0,1,float16,float16,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,128,0,1,float16,fp8,0,0.019546666493018467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,128,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,128,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,128,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,128,0,1,fp8,fp8,0,0.019610666980346043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,128,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,128,0,1,float16,float16,0,0.0162773331006368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,128,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,128,0,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,128,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,128,0,1,float16,float16,0,0.017711999515692394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,128,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,128,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,128,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,128,0,1,float16,fp8,0,0.0174346665541331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,128,0,1,float16,float16,0,0.01551466683546702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,128,0,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,128,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,128,0,1,float16,float16,0,0.01579733317097028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,128,0,1,float16,fp8,0,0.016480000068744022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,128,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,2,128,0,1,float16,float16,0,0.23924267292022705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,2,128,0,1,float16,fp8,0,0.23949867486953735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,2,128,0,1,fp8,fp8,0,0.22673600912094116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,4,128,0,1,float16,float16,0,0.2398186723391215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,4,128,0,1,float16,fp8,0,0.24013866980870566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,4,128,0,1,fp8,fp8,0,0.22662399212519327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,8,128,0,1,float16,float16,0,0.24036800861358643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,8,128,0,1,float16,fp8,0,0.23918400208155313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,48,8,128,0,1,fp8,fp8,0,0.22633600234985352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,0,0.12617599964141846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,0,0.12557866175969443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,48,128,0,1,fp8,fp8,0,0.12185066938400269
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,2,128,0,1,float16,float16,0,0.12544533610343933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,2,128,0,1,float16,fp8,0,0.1260640025138855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,2,128,0,1,fp8,fp8,0,0.11964799960454305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,4,128,0,1,float16,float16,0,0.12436800201733907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,4,128,0,1,float16,fp8,0,0.1263146698474884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,4,128,0,1,fp8,fp8,0,0.11981333295504253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,8,128,0,1,float16,float16,0,0.12563199798266092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,8,128,0,1,float16,fp8,0,0.12557866175969443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,48,8,128,0,1,fp8,fp8,0,0.11998933553695679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,0,0.07055466870466869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,0,0.0706826647122701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,48,128,0,1,fp8,fp8,0,0.06836266815662384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,2,128,0,1,float16,float16,0,0.07124799986680348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,2,128,0,1,float16,fp8,0,0.07060266534487407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,2,128,0,1,fp8,fp8,0,0.06846933563550313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,4,128,0,1,float16,float16,0,0.07249066730340321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,4,128,0,1,float16,fp8,0,0.07028266787528992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,4,128,0,1,fp8,fp8,0,0.06824000179767609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,8,128,0,1,float16,float16,0,0.07050666709740956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,8,128,0,1,float16,fp8,0,0.07027733325958252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,48,8,128,0,1,fp8,fp8,0,0.06870933373769124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,0,0.04359999795754751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,0,0.043920000394185386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,48,128,0,1,fp8,fp8,0,0.042352000872294106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,2,128,0,1,float16,float16,0,0.04381333291530609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,2,128,0,1,float16,fp8,0,0.044362664222717285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,2,128,0,1,fp8,fp8,0,0.04170133173465729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,4,128,0,1,float16,float16,0,0.04380266865094503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,4,128,0,1,float16,fp8,0,0.04376000165939331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,4,128,0,1,fp8,fp8,0,0.04197866717974345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,8,128,0,1,float16,float16,0,0.043738668163617454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,8,128,0,1,float16,fp8,0,0.04378666480382284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,48,8,128,0,1,fp8,fp8,0,0.04205866654713949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,0,0.029504001140594482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,0,0.029264000554879505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,48,128,0,1,fp8,fp8,0,0.029743999242782593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,2,128,0,1,float16,fp8,0,0.029605334003766377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,2,128,0,1,float16,float16,0,0.029466666281223297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,2,128,0,1,fp8,fp8,0,0.029322666426499683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,4,128,0,1,float16,float16,0,0.029487999776999157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,4,128,0,1,float16,fp8,0,0.0313226655125618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,4,128,0,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,8,128,0,1,float16,float16,0,0.029477333029111225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,8,128,0,1,float16,fp8,0,0.029872000217437744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,48,8,128,0,1,fp8,fp8,0,0.02938666691382726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,0,0.023130667706330616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,0,0.023370665808518726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,48,128,0,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,2,128,0,1,float16,float16,0,0.023189333577950794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,2,128,0,1,float16,fp8,0,0.02347733328739802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,2,128,0,1,fp8,fp8,0,0.02316266546646754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,4,128,0,1,float16,float16,0,0.02515733242034912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,4,128,0,1,float16,fp8,0,0.02550400048494339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,4,128,0,1,fp8,fp8,0,0.02317333221435547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,8,128,0,1,float16,float16,0,0.023978665471076965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,8,128,0,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,48,8,128,0,1,fp8,fp8,0,0.023061332603295643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,0,0.019893333315849304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,48,128,0,1,fp8,fp8,0,0.01956266661485036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,2,128,0,1,float16,float16,0,0.019061333189407986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,2,128,0,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,2,128,0,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,4,128,0,1,float16,float16,0,0.020703999946514767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,4,128,0,1,float16,fp8,0,0.021066665649414062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,4,128,0,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,8,128,0,1,float16,float16,0,0.019306667149066925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,8,128,0,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,48,8,128,0,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,48,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,2,128,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,2,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,2,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,4,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,4,128,0,1,float16,fp8,0,0.017711999515692394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,4,128,0,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,8,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,8,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,48,8,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,48,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,2,128,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,2,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,2,128,0,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,4,128,0,1,float16,float16,0,0.0173333336909612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,4,128,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,4,128,0,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,8,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,8,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,48,8,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,128,0,1,fp8,fp8,0,15.728720347086588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,128,0,1,float16,fp8,0,25.038917541503906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,128,0,1,float16,float16,0,28.99822998046875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,128,0,1,float16,float16,0,29.409685770670574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,128,0,1,fp8,fp8,0,15.575008392333984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,128,0,1,float16,fp8,0,27.33538055419922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,128,0,1,float16,float16,0,28.352582295735676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,128,0,1,fp8,fp8,0,16.30511474609375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,128,0,1,float16,float16,0,14.225173950195312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,128,0,1,float16,fp8,0,29.104036966959637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,128,0,1,fp8,fp8,0,8.543530782063803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,128,0,1,float16,fp8,0,14.596858978271484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,128,0,1,float16,float16,0,13.322884877522787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,128,0,1,fp8,fp8,0,7.980080286661784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,128,0,1,float16,fp8,0,14.014943440755209
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,128,0,1,fp8,fp8,0,8.138943990071615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,128,0,1,float16,float16,0,14.012181599934896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,128,0,1,float16,fp8,0,13.512235005696615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,128,0,1,float16,float16,0,14.516234079996744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,128,0,1,float16,float16,0,6.9649918874104815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,128,0,1,fp8,fp8,0,8.756464004516602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,128,0,1,float16,fp8,0,12.878431955973307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,128,0,1,fp8,fp8,0,4.560784022013347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,128,0,1,float16,fp8,0,7.310496012369792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,128,0,1,float16,float16,0,6.7461598714192705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,128,0,1,fp8,fp8,0,3.929775873819987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,128,0,1,float16,fp8,0,6.622479756673177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,128,0,1,float16,float16,0,7.109466552734375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,128,0,1,fp8,fp8,0,4.296773274739583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,128,0,1,float16,fp8,0,6.96285883585612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,128,0,1,float16,float16,0,6.936794916788737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,128,0,1,float16,float16,0,3.077808062235514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,128,0,1,fp8,fp8,0,3.9361279805501304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,128,0,1,float16,fp8,0,2.5978506406148276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,128,0,1,float16,fp8,0,7.146410624186198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,128,0,1,fp8,fp8,0,2.2752159436543784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,128,0,1,float16,float16,0,2.9812746047973633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,128,0,1,fp8,fp8,0,2.041114648183187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,128,0,1,float16,fp8,0,2.889018694559733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,128,0,1,float16,float16,0,3.3562132517496743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,128,0,1,float16,fp8,0,2.8453334172566733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,128,0,1,fp8,fp8,0,2.0304373105367026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,128,0,1,float16,float16,0,2.967146555582682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,128,0,1,fp8,fp8,0,2.089893341064453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,128,0,1,float16,fp8,0,3.597503980000814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,128,0,1,fp8,fp8,0,10.122170766194662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,128,0,1,float16,fp8,0,14.761221567789713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,128,0,1,float16,float16,0,16.23866144816081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,128,0,1,float16,float16,0,16.100181579589844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,128,0,1,fp8,fp8,0,9.29533322652181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,128,0,1,float16,fp8,0,16.000837961832683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,128,0,1,float16,float16,0,16.385045369466145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,128,0,1,fp8,fp8,0,9.338336308797201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,128,0,1,float16,fp8,0,16.760597229003906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,128,0,1,float16,float16,0,8.705888112386068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,128,0,1,fp8,fp8,0,5.06492805480957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,128,0,1,float16,fp8,0,8.725226720174154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,128,0,1,float16,float16,0,8.437349319458008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,128,0,1,float16,fp8,0,8.006538391113281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,128,0,1,fp8,fp8,0,4.491130510965983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,128,0,1,fp8,fp8,0,5.005765279134114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,128,0,1,float16,float16,0,8.411520004272461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,128,0,1,float16,fp8,0,8.565338770548502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,128,0,1,float16,float16,0,7.8231252034505205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,128,0,1,fp8,fp8,0,5.388133366902669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,128,0,1,float16,float16,0,4.357856114705403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,128,0,1,float16,fp8,0,7.965706507364909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,128,0,1,float16,fp8,0,4.022794723510742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,128,0,1,fp8,fp8,0,2.653792063395182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,128,0,1,fp8,fp8,0,2.218810717264811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,128,0,1,float16,float16,0,4.09547742207845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,128,0,1,float16,fp8,0,3.860960006713867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,128,0,1,float16,float16,0,4.313119888305664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,128,0,1,float16,fp8,0,3.3264214197794595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,128,0,1,fp8,fp8,0,2.4354027112325034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,128,0,1,float16,float16,0,3.921717325846354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,128,0,1,float16,float16,0,1.489957332611084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,128,0,1,fp8,fp8,0,2.254762649536133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,128,0,1,float16,fp8,0,4.381648063659668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,128,0,1,float16,fp8,0,1.5811573664347331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,128,0,1,fp8,fp8,0,1.3004053433736165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,128,0,1,float16,float16,0,1.5317920049031575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,128,0,1,fp8,fp8,0,1.2914400100708008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,128,0,1,float16,float16,0,1.4373067220052083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,128,0,1,float16,fp8,0,1.4357760747273762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,128,0,1,float16,fp8,0,1.5272000630696614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,128,0,1,fp8,fp8,0,1.2428853511810303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,128,0,1,float16,float16,0,1.4193013509114583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,128,0,1,float16,fp8,0,1.7543892860412598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,128,0,1,fp8,fp8,0,1.25381867090861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,128,0,1,fp8,fp8,0,7.244698842366536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,128,0,1,float16,float16,0,11.322896321614584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,128,0,1,float16,float16,0,10.123968124389648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,128,0,1,float16,fp8,0,11.997845967610678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,128,0,1,fp8,fp8,0,6.6136322021484375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,128,0,1,float16,fp8,0,11.31997807820638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,128,0,1,float16,float16,0,11.899941762288412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,128,0,1,fp8,fp8,0,6.698794682820638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,128,0,1,float16,float16,0,6.620704015096028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,128,0,1,float16,fp8,0,11.027467091878256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,128,0,1,fp8,fp8,0,3.571669260660807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,128,0,1,float16,fp8,0,6.109055836995442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,128,0,1,float16,float16,0,5.825087865193685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,128,0,1,fp8,fp8,0,3.1776320139567056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,128,0,1,float16,fp8,0,5.0199785232543945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,128,0,1,float16,float16,0,5.982767740885417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,128,0,1,fp8,fp8,0,3.1495466232299805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,128,0,1,float16,fp8,0,5.717125574747722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,128,0,1,float16,float16,0,5.35536003112793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,128,0,1,fp8,fp8,0,3.2739359537760415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,128,0,1,float16,float16,0,2.984405199686686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,128,0,1,float16,fp8,0,2.0071040789286294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,128,0,1,fp8,fp8,0,1.7462612787882488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,128,0,1,float16,fp8,0,5.955408096313477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,128,0,1,float16,float16,0,2.586191972096761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,128,0,1,fp8,fp8,0,1.6469066937764485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,128,0,1,float16,fp8,0,2.472533384958903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,128,0,1,float16,float16,0,2.7040106455485025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,128,0,1,float16,fp8,0,2.742543856302897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,128,0,1,fp8,fp8,0,1.6465546290079753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,128,0,1,float16,float16,0,1.984458605448405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,128,0,1,float16,float16,0,1.0874026616414387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,128,0,1,float16,fp8,0,3.0770187377929688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,128,0,1,fp8,fp8,0,1.651290734608968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,128,0,1,float16,fp8,0,1.089744011561076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,128,0,1,fp8,fp8,0,1.2558826605478923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,128,0,1,float16,fp8,0,1.1106986999511719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,128,0,1,fp8,fp8,0,0.9328373273213705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,128,0,1,float16,float16,0,1.1950133641560872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,128,0,1,float16,float16,0,1.062666654586792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,128,0,1,fp8,fp8,0,0.9367252985636393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,128,0,1,float16,fp8,0,1.1206719875335693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,128,0,1,float16,float16,0,1.176810661951701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,128,0,1,float16,fp8,0,1.1564213434855144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,128,0,1,fp8,fp8,0,0.9388533433278402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,128,0,1,fp8,fp8,0,9.062373479207357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,128,0,1,float16,float16,0,14.274463653564453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,128,0,1,float16,float16,0,15.474624633789062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,128,0,1,float16,fp8,0,14.782389322916666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,128,0,1,fp8,fp8,0,8.96444829305013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,128,0,1,float16,fp8,0,15.115472157796225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,128,0,1,float16,float16,0,15.341487884521484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,128,0,1,float16,fp8,0,15.946778615315756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,128,0,1,float16,float16,0,8.18069330851237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,128,0,1,fp8,fp8,0,9.189525604248047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,128,0,1,fp8,fp8,0,5.367136001586914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,128,0,1,float16,fp8,0,8.371221542358398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,128,0,1,float16,fp8,0,7.164096196492513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,128,0,1,float16,float16,0,7.403903961181641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,128,0,1,fp8,fp8,0,4.759210586547852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,128,0,1,fp8,fp8,0,4.783061345418294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,128,0,1,float16,fp8,0,7.32203737894694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,128,0,1,float16,float16,0,7.514240264892578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,128,0,1,float16,float16,0,7.667285283406575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,128,0,1,float16,float16,0,4.161904017130534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,128,0,1,float16,fp8,0,7.506325403849284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,128,0,1,fp8,fp8,0,4.442298571268718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,128,0,1,float16,fp8,0,4.275408109029134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,128,0,1,fp8,fp8,0,2.5478080113728843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,128,0,1,float16,fp8,0,2.731909434000651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,128,0,1,float16,float16,0,3.2541494369506836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,128,0,1,fp8,fp8,0,2.1812960306803384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,128,0,1,float16,float16,0,3.53496519724528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,128,0,1,float16,fp8,0,2.8617226282755532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,128,0,1,fp8,fp8,0,2.2073814074198403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,128,0,1,float16,float16,0,3.4021546045939126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,128,0,1,float16,float16,0,1.368735949198405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,128,0,1,fp8,fp8,0,2.1315627098083496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,128,0,1,fp8,fp8,0,1.2005120118459065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,128,0,1,float16,fp8,0,3.7977333068847656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,128,0,1,float16,fp8,0,1.9755199750264485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,128,0,1,float16,float16,0,1.3753652572631836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,128,0,1,float16,fp8,0,1.313973347345988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,128,0,1,fp8,fp8,0,1.2151467005411785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,128,0,1,float16,fp8,0,1.4074986775716145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,128,0,1,float16,float16,0,1.5858453114827473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,128,0,1,fp8,fp8,0,1.1461439927419026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,128,0,1,float16,float16,0,1.3266826470692952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,128,0,1,float16,fp8,0,1.3384960492451985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,128,0,1,float16,float16,0,0.7688852945963541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,128,0,1,fp8,fp8,0,1.152458667755127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,128,0,1,float16,fp8,0,0.8203252951304117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,128,0,1,fp8,fp8,0,0.7158613204956055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,128,0,1,float16,float16,0,0.7673973242441813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,128,0,1,float16,fp8,0,0.7502240339914957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,128,0,1,fp8,fp8,0,0.6893440087636312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,128,0,1,float16,float16,0,0.7698506514231364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,128,0,1,float16,fp8,0,0.7545386950174967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,128,0,1,fp8,fp8,0,0.6620320081710815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,128,0,1,float16,float16,0,0.7575093110402426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,128,0,1,fp8,fp8,0,0.6735893090566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,128,0,1,float16,fp8,0,0.7604533036549886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,128,0,1,fp8,fp8,0,5.427813212076823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,128,0,1,float16,fp8,0,8.44485346476237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,128,0,1,float16,float16,0,8.901119867960611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,128,0,1,fp8,fp8,0,4.978506724039714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,128,0,1,float16,float16,0,8.704394658406576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,128,0,1,float16,fp8,0,8.99946657816569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,128,0,1,float16,float16,0,9.429279963175455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,128,0,1,fp8,fp8,0,5.771685282389323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,128,0,1,float16,fp8,0,7.796986897786458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,128,0,1,float16,float16,0,5.194234530131022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,128,0,1,float16,fp8,0,4.96995735168457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,128,0,1,fp8,fp8,0,3.0762240091959634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,128,0,1,float16,float16,0,3.629866600036621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,128,0,1,fp8,fp8,0,2.774832089742025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,128,0,1,float16,fp8,0,4.602250734965007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,128,0,1,float16,float16,0,4.240885416666667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,128,0,1,fp8,fp8,0,2.7438879013061523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,128,0,1,float16,fp8,0,3.652005195617676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,128,0,1,float16,float16,0,2.1691306432088218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,128,0,1,float16,float16,0,4.749818801879883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,128,0,1,float16,fp8,0,4.288309415181478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,128,0,1,fp8,fp8,0,2.775210698445638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,128,0,1,fp8,fp8,0,1.5360639890034993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,128,0,1,float16,fp8,0,2.0808800061543784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,128,0,1,float16,fp8,0,1.5846826235453289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,128,0,1,float16,float16,0,2.011173407236735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,128,0,1,fp8,fp8,0,1.319050629933675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,128,0,1,fp8,fp8,0,1.3240373134613037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,128,0,1,float16,fp8,0,1.6146186192830403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,128,0,1,float16,float16,0,1.7261172930399578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,128,0,1,float16,float16,0,1.7865759531656902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,128,0,1,float16,fp8,0,1.5406880378723145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,128,0,1,float16,float16,0,0.9676480293273926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,128,0,1,fp8,fp8,0,1.3347360293070476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,128,0,1,float16,fp8,0,0.9943626721700033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,128,0,1,float16,float16,0,1.1263733704884846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,128,0,1,fp8,fp8,0,1.1148160298665364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,128,0,1,float16,fp8,0,0.8373226324717203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,128,0,1,fp8,fp8,0,0.8607839743296305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,128,0,1,float16,float16,0,0.8369440237681071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,128,0,1,float16,fp8,0,0.8393332958221436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,128,0,1,fp8,fp8,0,0.7339626948038737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,128,0,1,float16,float16,0,0.8468159834543864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,128,0,1,float16,float16,0,0.5046079953511556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,128,0,1,fp8,fp8,0,0.7367786566416422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,128,0,1,float16,fp8,0,0.8417973518371582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,128,0,1,float16,fp8,0,0.5409013430277506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,128,0,1,fp8,fp8,0,0.46038933595021564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,128,0,1,float16,float16,0,0.48467199007670086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,128,0,1,float16,fp8,0,0.4867200056711833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,128,0,1,fp8,fp8,0,0.4387893279393514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,128,0,1,float16,float16,0,0.4891413450241089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,128,0,1,float16,fp8,0,0.48876798152923584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,128,0,1,fp8,fp8,0,0.4352746804555257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,128,0,1,float16,float16,0,0.49615466594696045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,128,0,1,float16,fp8,0,0.4941120147705078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,128,0,1,fp8,fp8,0,0.4403040011723836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,128,0,1,fp8,fp8,0,5.077381451924642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,128,0,1,float16,float16,0,8.783733367919922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,128,0,1,float16,fp8,0,8.39022954305013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,128,0,1,float16,float16,0,8.019877115885416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,128,0,1,float16,fp8,0,7.609077453613281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,128,0,1,fp8,fp8,0,6.007418950398763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,128,0,1,float16,float16,0,9.480538686116537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,128,0,1,float16,fp8,0,7.972053527832031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,128,0,1,float16,float16,0,4.923631985982259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,128,0,1,fp8,fp8,0,5.133520126342773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,128,0,1,fp8,fp8,0,2.9893598556518555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,128,0,1,float16,fp8,0,4.810367902119954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,128,0,1,float16,fp8,0,3.406325340270996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,128,0,1,fp8,fp8,0,2.7389920552571616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,128,0,1,float16,float16,0,4.642362594604492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,128,0,1,float16,float16,0,4.552826563517253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,128,0,1,fp8,fp8,0,2.827429453531901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,128,0,1,float16,fp8,0,4.277066548665364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,128,0,1,float16,float16,0,4.60975456237793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,128,0,1,float16,float16,0,2.196021397908529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,128,0,1,fp8,fp8,0,2.763024012247721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,128,0,1,float16,fp8,0,2.089365323384603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,128,0,1,float16,fp8,0,4.567141215006511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,128,0,1,fp8,fp8,0,1.4578827222188313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,128,0,1,float16,float16,0,1.5517066319783528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,128,0,1,float16,fp8,0,1.9194879531860352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,128,0,1,fp8,fp8,0,1.3437280654907227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,128,0,1,float16,float16,0,1.6705546379089355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,128,0,1,float16,fp8,0,2.0511199633280435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,128,0,1,fp8,fp8,0,1.37990935643514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,128,0,1,float16,float16,0,1.6376427014668782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,128,0,1,float16,fp8,0,1.8497227032979329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,128,0,1,float16,float16,0,0.8624213536580404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,128,0,1,fp8,fp8,0,1.6207787195841472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,128,0,1,float16,fp8,0,0.9367360273996989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,128,0,1,fp8,fp8,0,0.7763360341389974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,128,0,1,float16,fp8,0,0.8144853115081787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,128,0,1,float16,float16,0,0.9418079853057861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,128,0,1,fp8,fp8,0,0.741376002629598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,128,0,1,float16,float16,0,0.8197120030721029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,128,0,1,float16,fp8,0,0.8213973045349121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,128,0,1,fp8,fp8,0,0.7642613252003988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,128,0,1,float16,float16,0,0.8290613492329916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,128,0,1,float16,fp8,0,0.8306346734364828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,128,0,1,float16,float16,0,0.4798826773961385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,128,0,1,fp8,fp8,0,0.803599993387858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,128,0,1,float16,fp8,0,0.4914933443069458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,128,0,1,fp8,fp8,0,0.4358026583989461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,128,0,1,float16,float16,0,0.4564853509267171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,128,0,1,float16,fp8,0,0.45718932151794434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,128,0,1,fp8,fp8,0,0.4033120075861613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,128,0,1,float16,float16,0,0.45837334791819256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,128,0,1,fp8,fp8,0,0.4057653347651164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,128,0,1,float16,fp8,0,0.4609546661376953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,128,0,1,float16,float16,0,0.46108798185984295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,128,0,1,fp8,fp8,0,0.41225067774454754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,128,0,1,float16,fp8,0,0.46242666244506836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,128,0,1,float16,float16,0,0.28964267174402875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,128,0,1,float16,fp8,0,0.29444799820582074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,128,0,1,fp8,fp8,0,0.26766933997472125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,128,0,1,float16,float16,0,0.2720106641451518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,128,0,1,fp8,fp8,0,0.2461386720339457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,128,0,1,float16,fp8,0,0.2730826735496521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,128,0,1,float16,float16,0,0.2739466627438863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,128,0,1,fp8,fp8,0,0.25095999240875244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,128,0,1,float16,fp8,0,0.2743519941965739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,128,0,1,float16,float16,0,0.27527467409769696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,128,0,1,fp8,fp8,0,0.25278933842976886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,128,0,1,float16,fp8,0,0.2766186594963074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,128,0,1,fp8,fp8,0,3.241898536682129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,128,0,1,float16,float16,0,4.925466537475586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,128,0,1,float16,fp8,0,5.071119944254558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,128,0,1,float16,float16,0,5.052559852600098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,128,0,1,float16,fp8,0,4.456277211507161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,128,0,1,fp8,fp8,0,3.258122762044271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,128,0,1,float16,float16,0,5.0692799886067705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,128,0,1,float16,float16,0,2.4864586194356284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,128,0,1,fp8,fp8,0,3.3032585779825845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,128,0,1,float16,fp8,0,5.174720128377278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,128,0,1,float16,fp8,0,2.1014720598856607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,128,0,1,fp8,fp8,0,1.8367466926574707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,128,0,1,float16,float16,0,1.9643893241882324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,128,0,1,float16,fp8,0,2.2625865936279297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,128,0,1,float16,float16,0,1.9680372873942058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,128,0,1,fp8,fp8,0,1.8248319625854492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,128,0,1,float16,fp8,0,1.9230186144510906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,128,0,1,fp8,fp8,0,1.6592267354329426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,128,0,1,float16,float16,0,1.9175200462341309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,128,0,1,float16,fp8,0,2.266757329305013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,128,0,1,fp8,fp8,0,1.6735092798868816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,128,0,1,float16,float16,0,1.0759519735972087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,128,0,1,fp8,fp8,0,1.2211893399556477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,128,0,1,float16,fp8,0,1.3304426670074463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,128,0,1,float16,float16,0,0.98908797899882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,128,0,1,float16,fp8,0,1.0003039836883545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,128,0,1,fp8,fp8,0,0.8675093650817871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,128,0,1,float16,float16,0,1.0758346716562908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,128,0,1,float16,fp8,0,0.9989973704020182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,128,0,1,fp8,fp8,0,0.9832320213317871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,128,0,1,float16,float16,0,1.0007572968800862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,128,0,1,float16,fp8,0,1.008512020111084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,128,0,1,float16,float16,0,0.5774773359298706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,128,0,1,fp8,fp8,0,0.8908106486002604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,128,0,1,float16,fp8,0,0.6666826407114664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,128,0,1,fp8,fp8,0,0.5185546477635702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,128,0,1,float16,float16,0,0.5389493306477865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,128,0,1,float16,fp8,0,0.5591040054957072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,128,0,1,fp8,fp8,0,0.4790453513463338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,128,0,1,float16,float16,0,0.5378506580988566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,128,0,1,float16,fp8,0,0.5424266656239828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,128,0,1,fp8,fp8,0,0.4755466779073079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,128,0,1,float16,float16,0,0.5408693154652914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,128,0,1,float16,fp8,0,0.5475946664810181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,128,0,1,float16,float16,0,0.32738133271535236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,128,0,1,fp8,fp8,0,0.4800106684366862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,128,0,1,float16,fp8,0,0.33353598912556964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,128,0,1,fp8,fp8,0,0.3001599907875061
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,128,0,1,float16,float16,0,0.30499200026194256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,128,0,1,float16,fp8,0,0.3052053252855937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,128,0,1,fp8,fp8,0,0.2739413380622864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,128,0,1,float16,float16,0,0.305184006690979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,128,0,1,fp8,fp8,0,0.2741760015487671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,128,0,1,float16,fp8,0,0.30638933181762695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,128,0,1,float16,float16,0,0.3081066608428955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,128,0,1,float16,float16,0,0.2067626714706421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,128,0,1,float16,fp8,0,0.3097760081291199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,128,0,1,fp8,fp8,0,0.27752532561620075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,128,0,1,float16,fp8,0,0.2061013380686442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,128,0,1,fp8,fp8,0,0.18976000944773355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,128,0,1,float16,float16,0,0.18613332509994507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,128,0,1,float16,fp8,0,0.18917334079742432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,128,0,1,fp8,fp8,0,0.17230933904647827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,128,0,1,float16,float16,0,0.19084266821543375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,128,0,1,float16,fp8,0,0.18940800428390503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,128,0,1,fp8,fp8,0,0.1715786655743917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,128,0,1,float16,float16,0,0.188810666402181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,128,0,1,float16,fp8,0,0.1908479928970337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,128,0,1,fp8,fp8,0,0.17305066188176474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,128,0,1,fp8,fp8,0,3.5867465337117515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,128,0,1,float16,float16,0,5.189125378926595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,128,0,1,float16,fp8,0,5.05186653137207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,128,0,1,float16,float16,0,4.833797454833984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,128,0,1,float16,fp8,0,4.605477333068848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,128,0,1,fp8,fp8,0,3.5993919372558594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,128,0,1,float16,float16,0,5.645962397257487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,128,0,1,float16,fp8,0,4.86843204498291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,128,0,1,float16,float16,0,2.552821318308512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,128,0,1,fp8,fp8,0,3.69976011912028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,128,0,1,float16,fp8,0,2.792501449584961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,128,0,1,fp8,fp8,0,2.0471040407816568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,128,0,1,float16,float16,0,2.1252640088399253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,128,0,1,float16,fp8,0,2.256869316101074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,128,0,1,fp8,fp8,0,1.7894612948099773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,128,0,1,float16,float16,0,2.4413866996765137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,128,0,1,fp8,fp8,0,1.805514653523763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,128,0,1,float16,fp8,0,2.164517402648926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,128,0,1,float16,float16,0,1.1703680356343586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,128,0,1,float16,float16,0,2.083237330118815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,128,0,1,fp8,fp8,0,1.8359519640604656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,128,0,1,float16,fp8,0,2.377247969309489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,128,0,1,fp8,fp8,0,1.0536853472391765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,128,0,1,float16,fp8,0,1.2489439646402996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,128,0,1,float16,float16,0,1.063370704650879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,128,0,1,float16,fp8,0,1.087615966796875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,128,0,1,fp8,fp8,0,0.9247999986012777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,128,0,1,float16,float16,0,1.182751973470052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,128,0,1,float16,fp8,0,1.0710240205128987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,128,0,1,fp8,fp8,0,0.9332693417867025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,128,0,1,float16,float16,0,1.0772586663564045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,128,0,1,float16,float16,0,0.6196426550547282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,128,0,1,float16,fp8,0,1.161621332168579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,128,0,1,fp8,fp8,0,0.9480480353037516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,128,0,1,float16,fp8,0,0.6295946836471558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,128,0,1,fp8,fp8,0,0.5655893484751383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,128,0,1,float16,float16,0,0.5610773166020712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,128,0,1,float16,fp8,0,0.5686773459116617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,128,0,1,fp8,fp8,0,0.49347201983133954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,128,0,1,float16,float16,0,0.5635253190994263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,128,0,1,float16,fp8,0,0.5698399941126505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,128,0,1,fp8,fp8,0,0.4985119899113973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,128,0,1,float16,float16,0,0.5695893367131551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,128,0,1,float16,float16,0,0.3365600109100342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,128,0,1,float16,fp8,0,0.5793226559956869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,128,0,1,fp8,fp8,0,0.501477320988973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,128,0,1,float16,fp8,0,0.34405867258707684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,128,0,1,fp8,fp8,0,0.3073920011520386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,128,0,1,float16,float16,0,0.3099840084711711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,128,0,1,float16,fp8,0,0.30914666255315143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,128,0,1,fp8,fp8,0,0.2752426664034526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,128,0,1,float16,float16,0,0.31067200501759845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,128,0,1,float16,fp8,0,0.3137493332227071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,128,0,1,fp8,fp8,0,0.2770880063374837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,128,0,1,float16,float16,0,0.31386667490005493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,128,0,1,float16,float16,0,0.1985493302345276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,128,0,1,float16,fp8,0,0.3201013406117757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,128,0,1,fp8,fp8,0,0.2831786672274272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,128,0,1,float16,fp8,0,0.20211732387542725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,128,0,1,fp8,fp8,0,0.18266133467356363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,128,0,1,fp8,fp8,0,0.16036267081896463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,128,0,1,float16,float16,0,0.17826666434605917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,128,0,1,float16,fp8,0,0.18013334274291992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,128,0,1,float16,float16,0,0.17966399590174356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,128,0,1,fp8,fp8,0,0.16450666387875876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,128,0,1,float16,fp8,0,0.1794346570968628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,128,0,1,float16,float16,0,0.18074132998784384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,128,0,1,float16,float16,0,0.12322133779525757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,128,0,1,float16,fp8,0,0.12586133678754172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,128,0,1,fp8,fp8,0,0.1688800056775411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,128,0,1,float16,fp8,0,0.18209065993626913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,128,0,1,fp8,fp8,0,0.11950400471687317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,128,0,1,float16,float16,0,0.11757333079973857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,128,0,1,float16,fp8,0,0.11588266491889954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,128,0,1,fp8,fp8,0,0.10730133454004924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,128,0,1,float16,float16,0,0.11719999710718791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,128,0,1,float16,fp8,0,0.11914666493733723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,128,0,1,fp8,fp8,0,0.10780800382296245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,128,0,1,float16,float16,0,0.11551466584205627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,128,0,1,fp8,fp8,0,0.10902933279673259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,128,0,1,float16,fp8,0,0.11787199974060059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,128,0,1,fp8,fp8,0,2.407711982727051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,128,0,1,float16,float16,0,2.74179744720459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,128,0,1,float16,fp8,0,2.873210589090983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,128,0,1,float16,float16,0,3.1121066411336265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,128,0,1,fp8,fp8,0,2.4304000536600747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,128,0,1,float16,fp8,0,2.875359853108724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,128,0,1,float16,float16,0,3.011413256327311
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,128,0,1,float16,fp8,0,2.9113547007242837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,128,0,1,float16,float16,0,1.7262026468912761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,128,0,1,float16,fp8,0,1.5890827178955078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,128,0,1,fp8,fp8,0,2.480031967163086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,128,0,1,fp8,fp8,0,1.4082293510437012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,128,0,1,float16,float16,0,1.3986560503641765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,128,0,1,fp8,fp8,0,1.2155146598815918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,128,0,1,float16,fp8,0,1.4949439366658528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,128,0,1,float16,float16,0,1.4013439814249675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,128,0,1,float16,fp8,0,1.4196799596150715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,128,0,1,fp8,fp8,0,1.229434649149577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,128,0,1,float16,float16,0,1.421845277150472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,128,0,1,float16,fp8,0,1.5010186831156414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,128,0,1,float16,float16,0,0.8081546624501547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,128,0,1,float16,fp8,0,0.8576320012410482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,128,0,1,fp8,fp8,0,1.336949348449707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,128,0,1,fp8,fp8,0,0.7298133373260498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,128,0,1,float16,float16,0,0.7244266668955485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,128,0,1,float16,fp8,0,0.7372159957885742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,128,0,1,fp8,fp8,0,0.6325386762619019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,128,0,1,float16,float16,0,0.7323786417643229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,128,0,1,float16,fp8,0,0.7334506511688232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,128,0,1,fp8,fp8,0,0.641210675239563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,128,0,1,float16,float16,0,0.7405172983805338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,128,0,1,fp8,fp8,0,0.651253342628479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,128,0,1,float16,fp8,0,0.7489866415659586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,128,0,1,float16,float16,0,0.42871467272440594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,128,0,1,float16,fp8,0,0.44096533457438153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,128,0,1,fp8,fp8,0,0.3907839854558309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,128,0,1,float16,float16,0,0.38782934347788495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,128,0,1,fp8,fp8,0,0.34060267607371014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,128,0,1,float16,fp8,0,0.38972266515096027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,128,0,1,float16,float16,0,0.39107731978098553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,128,0,1,fp8,fp8,0,0.3468266725540161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,128,0,1,float16,fp8,0,0.39139731725056964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,128,0,1,float16,float16,0,0.3956906795501709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,128,0,1,float16,fp8,0,0.4010773499806722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,128,0,1,fp8,fp8,0,0.34958934783935547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,128,0,1,float16,float16,0,0.23869333664576212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,128,0,1,fp8,fp8,0,0.21827733516693115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,128,0,1,float16,fp8,0,0.24443199237187704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,128,0,1,float16,float16,0,0.213536004225413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,128,0,1,fp8,fp8,0,0.19390400250752768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,128,0,1,float16,fp8,0,0.21302932500839233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,128,0,1,float16,float16,0,0.21538132429122925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,128,0,1,float16,fp8,0,0.2154080073038737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,128,0,1,fp8,fp8,0,0.19607466459274292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,128,0,1,float16,float16,0,0.21875200668970743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,128,0,1,float16,fp8,0,0.22212799390157065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,128,0,1,fp8,fp8,0,0.19800533850987753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,128,0,1,float16,float16,0,0.14194666345914206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,128,0,1,float16,fp8,0,0.14518400033315024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,128,0,1,fp8,fp8,0,0.13195733229319254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,128,0,1,float16,float16,0,0.1251146694024404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,128,0,1,float16,fp8,0,0.12686933080355325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,128,0,1,fp8,fp8,0,0.11327466368675232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,128,0,1,float16,float16,0,0.12492266297340393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,128,0,1,float16,fp8,0,0.1256586710611979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,128,0,1,fp8,fp8,0,0.11435199777285258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,128,0,1,float16,float16,0,0.12662933270136514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,128,0,1,float16,fp8,0,0.12888532876968384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,128,0,1,float16,float16,0,0.0906880001227061
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,128,0,1,fp8,fp8,0,0.11779200037320454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,128,0,1,float16,fp8,0,0.09092266360918681
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,128,0,1,fp8,fp8,0,0.0869813362757365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,128,0,1,float16,float16,0,0.08692266543706258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,128,0,1,float16,fp8,0,0.08709333340326945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,128,0,1,fp8,fp8,0,0.0823359986146291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,128,0,1,float16,float16,0,0.0865280032157898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,128,0,1,float16,fp8,0,0.08733333150545756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,128,0,1,fp8,fp8,0,0.08091733356316884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,128,0,1,float16,float16,0,0.08742400010426839
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,128,0,1,float16,fp8,0,0.088837335507075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,128,0,1,fp8,fp8,0,0.08241066833337148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,128,0,1,fp8,fp8,0,2.7637974421183267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,128,0,1,float16,float16,0,3.2700745264689126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,128,0,1,float16,fp8,0,3.064917246500651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,128,0,1,float16,float16,0,2.9420159657796225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,128,0,1,float16,fp8,0,2.965002695719401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,128,0,1,fp8,fp8,0,2.930586814880371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,128,0,1,float16,float16,0,3.238154729207357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,128,0,1,float16,fp8,0,3.3216320673624673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,128,0,1,float16,float16,0,1.8596000671386719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,128,0,1,fp8,fp8,0,1.6392159461975098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,128,0,1,float16,fp8,0,1.6898080507914226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,128,0,1,fp8,fp8,0,2.960437456766764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,128,0,1,float16,float16,0,1.4832372665405273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,128,0,1,fp8,fp8,0,1.3724586168924968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,128,0,1,float16,fp8,0,1.4739413261413574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,128,0,1,float16,float16,0,1.4876319567362468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,128,0,1,float16,fp8,0,1.4873439470926921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,128,0,1,fp8,fp8,0,1.399280071258545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,128,0,1,float16,float16,0,1.5206079483032227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,128,0,1,float16,fp8,0,1.5110559463500977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,128,0,1,float16,float16,0,0.8781332969665527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,128,0,1,float16,fp8,0,0.8756106694539388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,128,0,1,fp8,fp8,0,1.4920159975687664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,128,0,1,fp8,fp8,0,0.8335733413696289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,128,0,1,float16,float16,0,0.7555627028147379
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,128,0,1,float16,fp8,0,0.8082613150278727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,128,0,1,fp8,fp8,0,0.6967519919077555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,128,0,1,float16,float16,0,0.7596586545308431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,128,0,1,float16,fp8,0,0.7661866346995035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,128,0,1,fp8,fp8,0,0.7043360074361166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,128,0,1,float16,float16,0,0.7713279724121094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,128,0,1,float16,float16,0,0.4565653403600057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,128,0,1,float16,fp8,0,0.7735253175099691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,128,0,1,fp8,fp8,0,0.7707413037618002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,128,0,1,float16,fp8,0,0.4458133379618327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,128,0,1,fp8,fp8,0,0.4320266644159953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,128,0,1,float16,float16,0,0.39160001277923584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,128,0,1,float16,fp8,0,0.39206401507059735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,128,0,1,fp8,fp8,0,0.35362664858500165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,128,0,1,float16,fp8,0,0.3975199858347575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,128,0,1,float16,float16,0,0.39612265427907306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,128,0,1,fp8,fp8,0,0.36653868357340497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,128,0,1,float16,float16,0,0.40281065305074054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,128,0,1,float16,float16,0,0.24289600054423013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,128,0,1,fp8,fp8,0,0.3718133370081584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,128,0,1,float16,fp8,0,0.4006986618041992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,128,0,1,float16,fp8,0,0.2372586727142334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,128,0,1,float16,float16,0,0.2090346614519755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,128,0,1,fp8,fp8,0,0.22780799865722656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,128,0,1,float16,fp8,0,0.2116960088411967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,128,0,1,fp8,fp8,0,0.18973867098490396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,128,0,1,float16,float16,0,0.21172267198562622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,128,0,1,float16,fp8,0,0.21341333786646524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,128,0,1,fp8,fp8,0,0.19691733519236246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,128,0,1,float16,float16,0,0.21542400121688843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,128,0,1,float16,fp8,0,0.216703991095225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,128,0,1,float16,float16,0,0.1361066699028015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,128,0,1,fp8,fp8,0,0.19969600439071655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,128,0,1,float16,fp8,0,0.13563199838002524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,128,0,1,fp8,fp8,0,0.12956800063451132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,128,0,1,float16,float16,0,0.11449600259462993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,128,0,1,float16,fp8,0,0.11471999684969585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,128,0,1,fp8,fp8,0,0.10354666908582051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,128,0,1,float16,float16,0,0.1151039997736613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,128,0,1,float16,fp8,0,0.11560533444086711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,128,0,1,fp8,fp8,0,0.1076586643854777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,128,0,1,float16,float16,0,0.11779200037320454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,128,0,1,float16,fp8,0,0.11732266346613567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,128,0,1,fp8,fp8,0,0.11216533184051514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,128,0,1,float16,float16,0,0.07421333094437917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,128,0,1,float16,fp8,0,0.07507733503977458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,128,0,1,fp8,fp8,0,0.07670933504899342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,128,0,1,float16,float16,0,0.06843733290831248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,128,0,1,float16,fp8,0,0.06804800033569336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,128,0,1,float16,float16,0,0.0682239979505539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,128,0,1,fp8,fp8,0,0.06252266466617584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,128,0,1,float16,fp8,0,0.06841066479682922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,128,0,1,fp8,fp8,0,0.060506666700045265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,128,0,1,float16,float16,0,0.06857599814732869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,128,0,1,float16,fp8,0,0.06814933319886525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,128,0,1,float16,float16,0,0.0499893327554067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,128,0,1,fp8,fp8,0,0.06124266485373179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,128,0,1,float16,fp8,0,0.0484799991051356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,128,0,1,fp8,fp8,0,0.044106667240460716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,128,0,1,float16,float16,0,0.048063998421033226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,128,0,1,float16,fp8,0,0.04744000236193339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,128,0,1,fp8,fp8,0,0.042352000872294106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,128,0,1,float16,float16,0,0.04834666848182678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,128,0,1,float16,fp8,0,0.04784533381462097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,128,0,1,fp8,fp8,0,0.04338666796684265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,128,0,1,float16,float16,0,0.047781333327293396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,128,0,1,float16,fp8,0,0.048138668139775596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,128,0,1,fp8,fp8,0,0.04358399907747904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,128,0,1,float16,float16,0,2.48634672164917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,128,0,1,fp8,fp8,0,2.3892480532328286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,128,0,1,float16,fp8,0,2.4837759335835776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,128,0,1,float16,float16,0,2.5417280197143555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,128,0,1,float16,fp8,0,2.5131200154622397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,128,0,1,fp8,fp8,0,2.581567923227946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,128,0,1,float16,float16,0,2.7383626302083335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,128,0,1,float16,float16,0,1.489733378092448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,128,0,1,float16,fp8,0,1.4905813535054524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,128,0,1,float16,fp8,0,2.7141332626342773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,128,0,1,fp8,fp8,0,2.5926027297973633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,128,0,1,fp8,fp8,0,1.4561120669047039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,128,0,1,float16,float16,0,1.2573440074920654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,128,0,1,float16,fp8,0,1.2553280194600422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,128,0,1,fp8,fp8,0,1.1819519996643066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,128,0,1,float16,fp8,0,1.2767253716786702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,128,0,1,float16,float16,0,1.2706987063090007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,128,0,1,fp8,fp8,0,1.1931040287017822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,128,0,1,float16,float16,0,1.3090186913808186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,128,0,1,float16,float16,0,0.7636053562164307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,128,0,1,float16,fp8,0,1.2969706853230794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,128,0,1,fp8,fp8,0,1.3120053609212239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,128,0,1,float16,fp8,0,0.7434773445129395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,128,0,1,fp8,fp8,0,0.7346346378326416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,128,0,1,float16,float16,0,0.6407519976298014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,128,0,1,float16,fp8,0,0.6404266754786173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,128,0,1,fp8,fp8,0,0.6005173524220785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,128,0,1,float16,float16,0,0.6506506601969401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,128,0,1,float16,fp8,0,0.652949333190918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,128,0,1,fp8,fp8,0,0.61134934425354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,128,0,1,float16,float16,0,0.6594400008519491
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,128,0,1,float16,float16,0,0.3961333433787028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,128,0,1,float16,fp8,0,0.6573760112126669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,128,0,1,float16,fp8,0,0.3863146702448527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,128,0,1,fp8,fp8,0,0.6245280106862386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,128,0,1,fp8,fp8,0,0.3798666795094808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,128,0,1,float16,float16,0,0.3314453363418579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,128,0,1,float16,fp8,0,0.3320479989051819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,128,0,1,fp8,fp8,0,0.30372800429662067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,128,0,1,float16,float16,0,0.3392426570256551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,128,0,1,fp8,fp8,0,0.31350932518641156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,128,0,1,float16,fp8,0,0.33961065610249835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,128,0,1,float16,float16,0,0.3436319828033447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,128,0,1,float16,float16,0,0.2130133310953776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,128,0,1,float16,fp8,0,0.20666666825612387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,128,0,1,fp8,fp8,0,0.32062933842341107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,128,0,1,float16,fp8,0,0.34242133299509686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,128,0,1,fp8,fp8,0,0.2017973264058431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,128,0,1,float16,float16,0,0.17769600947697958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,128,0,1,float16,fp8,0,0.17804267009099325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,128,0,1,fp8,fp8,0,0.16356266538302103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,128,0,1,float16,float16,0,0.18152000506718954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,128,0,1,float16,fp8,0,0.18178667624791464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,128,0,1,fp8,fp8,0,0.16990933815638223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,128,0,1,float16,float16,0,0.18448533614476523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,128,0,1,float16,fp8,0,0.18384534120559692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,128,0,1,fp8,fp8,0,0.17335466543833414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,128,0,1,float16,float16,0,0.1170240044593811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,128,0,1,float16,fp8,0,0.11460799972216289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,128,0,1,fp8,fp8,0,0.11427199840545654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,128,0,1,float16,float16,0,0.09542933106422424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,128,0,1,float16,fp8,0,0.09578133622805278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,128,0,1,fp8,fp8,0,0.08717333277066548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,128,0,1,float16,float16,0,0.09585066636403401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,128,0,1,float16,fp8,0,0.09694400429725647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,128,0,1,fp8,fp8,0,0.09126399954160054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,128,0,1,float16,float16,0,0.10050666332244873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,128,0,1,float16,fp8,0,0.09899733463923137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,128,0,1,fp8,fp8,0,0.09601599971453349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,128,0,1,float16,float16,0,0.06644266843795776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,128,0,1,float16,fp8,0,0.06522666911284129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,128,0,1,fp8,fp8,0,0.06675733129183452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,128,0,1,float16,float16,0,0.05820799867312113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,128,0,1,float16,fp8,0,0.059808000922203064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,128,0,1,fp8,fp8,0,0.051914667089780174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,128,0,1,float16,float16,0,0.0603359987338384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,128,0,1,float16,fp8,0,0.05880533158779144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,128,0,1,fp8,fp8,0,0.05197333296140035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,128,0,1,float16,float16,0,0.06012799839178721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,128,0,1,float16,fp8,0,0.060133333007494606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,128,0,1,fp8,fp8,0,0.053599998354911804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,128,0,1,float16,float16,0,0.042405332128206887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,128,0,1,float16,fp8,0,0.043882668018341064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,128,0,1,fp8,fp8,0,0.0397119993964831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,128,0,1,float16,float16,0,0.04161066561937332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,128,0,1,float16,fp8,0,0.04204266766707102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,128,0,1,fp8,fp8,0,0.03762666632731756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,128,0,1,float16,float16,0,0.04181866844495138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,128,0,1,float16,fp8,0,0.04185600082079569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,128,0,1,fp8,fp8,0,0.03893866638342539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,128,0,1,float16,float16,0,0.04307733476161957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,128,0,1,float16,fp8,0,0.041850666205088295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,128,0,1,fp8,fp8,0,0.039488000174363456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,128,0,1,float16,float16,0,0.029696000119050343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,128,0,1,float16,fp8,0,0.03046933313210805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,128,0,1,fp8,fp8,0,0.0295413335164388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,128,0,1,float16,float16,0,0.029232000311215717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,128,0,1,float16,fp8,0,0.029445332785447437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,128,0,1,fp8,fp8,0,0.02736533433198929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,128,0,1,float16,float16,0,0.02937600016593933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,128,0,1,float16,fp8,0,0.02956266701221466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,128,0,1,fp8,fp8,0,0.02733866622050603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,128,0,1,float16,float16,0,0.029215998947620392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,128,0,1,float16,fp8,0,0.03150933235883713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,128,0,1,fp8,fp8,0,0.02757333219051361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,128,0,1,float16,float16,0,1.1295733451843262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,128,0,1,fp8,fp8,0,1.0760640303293865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,128,0,1,float16,fp8,0,1.1280799706776936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,128,0,1,fp8,fp8,0,1.1464426517486572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,128,0,1,float16,float16,0,1.1449493567148845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,128,0,1,float16,fp8,0,1.1461280186971028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,128,0,1,float16,float16,0,0.7009759744008383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,128,0,1,float16,fp8,0,1.1660479704538982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,128,0,1,fp8,fp8,0,1.2092959880828857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,128,0,1,float16,float16,0,1.1864960193634033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,128,0,1,float16,fp8,0,0.6799519856770834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,128,0,1,float16,float16,0,0.571071982383728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,128,0,1,fp8,fp8,0,0.68122665087382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,128,0,1,float16,fp8,0,0.5698080062866211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,128,0,1,fp8,fp8,0,0.5444373289744059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,128,0,1,float16,float16,0,0.5800533294677734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,128,0,1,float16,fp8,0,0.5806986490885416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,128,0,1,fp8,fp8,0,0.5515466531117758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,128,0,1,float16,float16,0,0.5959680080413818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,128,0,1,float16,float16,0,0.3610080083211263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,128,0,1,float16,fp8,0,0.5906346638997396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,128,0,1,float16,fp8,0,0.35326401392618817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,128,0,1,fp8,fp8,0,0.5659253199895223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,128,0,1,float16,float16,0,0.2946773370107015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,128,0,1,fp8,fp8,0,0.35186131795247394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,128,0,1,float16,fp8,0,0.2964586615562439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,128,0,1,fp8,fp8,0,0.2728853424390157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,128,0,1,float16,float16,0,0.3012746572494507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,128,0,1,float16,fp8,0,0.3006773392359416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,128,0,1,fp8,fp8,0,0.2845653295516968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,128,0,1,float16,float16,0,0.30664000908533734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,128,0,1,float16,float16,0,0.19450666507085165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,128,0,1,float16,fp8,0,0.18794665733973184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,128,0,1,float16,fp8,0,0.3069546620051066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,128,0,1,fp8,fp8,0,0.2906613349914551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,128,0,1,float16,float16,0,0.1588373382886251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,128,0,1,fp8,fp8,0,0.1872319976488749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,128,0,1,float16,fp8,0,0.16062399744987488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,128,0,1,fp8,fp8,0,0.14621333281199136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,128,0,1,float16,float16,0,0.16299733519554138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,128,0,1,float16,fp8,0,0.16292799512545267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,128,0,1,fp8,fp8,0,0.15236799915631613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,128,0,1,float16,float16,0,0.16450132926305136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,128,0,1,float16,float16,0,0.10938133796056111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,128,0,1,float16,fp8,0,0.16668800512949625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,128,0,1,fp8,fp8,0,0.1564959983030955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,128,0,1,float16,fp8,0,0.10582933823267619
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,128,0,1,fp8,fp8,0,0.10752000411351521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,128,0,1,float16,float16,0,0.08919466535250346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,128,0,1,float16,fp8,0,0.08984532952308655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,128,0,1,fp8,fp8,0,0.08088533580303192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,128,0,1,float16,float16,0,0.09079466263453166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,128,0,1,float16,fp8,0,0.09142399827639262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,128,0,1,fp8,fp8,0,0.08475200335184734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,128,0,1,float16,float16,0,0.09314666191736858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,128,0,1,float16,fp8,0,0.0932373305161794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,128,0,1,fp8,fp8,0,0.08901333808898926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,128,0,1,float16,float16,0,0.0628959983587265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,128,0,1,float16,fp8,0,0.06226666768391927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,128,0,1,fp8,fp8,0,0.06284800171852112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,128,0,1,float16,float16,0,0.05495466788609823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,128,0,1,float16,fp8,0,0.054933334390322365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,128,0,1,fp8,fp8,0,0.04781333108743032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,128,0,1,float16,float16,0,0.055434669057528176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,128,0,1,float16,fp8,0,0.05563733478387197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,128,0,1,fp8,fp8,0,0.049866666396458946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,128,0,1,float16,float16,0,0.05670933425426483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,128,0,1,float16,fp8,0,0.05646933118502299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,128,0,1,fp8,fp8,0,0.049882665276527405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,128,0,1,float16,float16,0,0.0397173340121905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,128,0,1,float16,fp8,0,0.03993066648642222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,128,0,1,fp8,fp8,0,0.037647999823093414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,128,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,128,0,1,float16,fp8,0,0.03759466608365377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,128,0,1,fp8,fp8,0,0.035487999518712364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,128,0,1,float16,float16,0,0.03770666569471359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,128,0,1,float16,fp8,0,0.03765333443880081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,128,0,1,fp8,fp8,0,0.035717333356539406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,128,0,1,float16,float16,0,0.03842133283615112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,128,0,1,float16,fp8,0,0.03789866715669632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,128,0,1,fp8,fp8,0,0.03649600098530451
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,128,0,1,float16,float16,0,0.02756800005833308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,128,0,1,float16,fp8,0,0.027327999472618103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,128,0,1,fp8,fp8,0,0.025455998877684276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,128,0,1,float16,fp8,0,0.02739199995994568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,128,0,1,float16,float16,0,0.02757866680622101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,128,0,1,fp8,fp8,0,0.02521066615978877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,128,0,1,float16,float16,0,0.027386667827765148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,128,0,1,float16,fp8,0,0.027535999814669292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,128,0,1,fp8,fp8,0,0.025546667476495106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,128,0,1,float16,float16,0,0.027450665831565857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,128,0,1,float16,fp8,0,0.027493332823117573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,128,0,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,128,0,1,float16,float16,0,0.025455998877684276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,128,0,1,float16,fp8,0,0.025216000775496166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,128,0,1,fp8,fp8,0,0.02327999969323476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,128,0,1,float16,float16,0,0.02327466756105423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,128,0,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,128,0,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,128,0,1,float16,float16,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,128,0,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,128,0,1,fp8,fp8,0,0.021407999098300934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,128,0,1,float16,float16,0,0.023434666295846302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,128,0,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,128,0,1,fp8,fp8,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,128,0,1,float16,float16,0,0.6175306638081869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,128,0,1,fp8,fp8,0,0.5946346521377563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,128,0,1,float16,fp8,0,0.6190666755040487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,128,0,1,float16,float16,0,0.6271786689758301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,128,0,1,fp8,fp8,0,0.6021600166956583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,128,0,1,float16,fp8,0,0.6293866634368896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,128,0,1,float16,float16,0,0.6416053374608358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,128,0,1,fp8,fp8,0,0.6153866847356161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,128,0,1,float16,fp8,0,0.6398773193359375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,128,0,1,float16,float16,0,0.3786506652832031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,128,0,1,float16,fp8,0,0.3710879882176717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,128,0,1,fp8,fp8,0,0.37509334087371826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,128,0,1,float16,float16,0,0.3185759981473287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,128,0,1,float16,fp8,0,0.3171360095342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,128,0,1,fp8,fp8,0,0.2977813283602397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,128,0,1,float16,float16,0,0.32363732655843097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,128,0,1,float16,fp8,0,0.32452799876530963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,128,0,1,fp8,fp8,0,0.307151993115743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,128,0,1,float16,float16,0,0.3288480043411255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,128,0,1,float16,fp8,0,0.3272426724433899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,128,0,1,fp8,fp8,0,0.3158453305562337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,128,0,1,float16,float16,0,0.19899733861287436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,128,0,1,float16,fp8,0,0.19485332568486533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,128,0,1,fp8,fp8,0,0.19511467218399048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,128,0,1,float16,fp8,0,0.16802134116490683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,128,0,1,float16,float16,0,0.1672746737798055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,128,0,1,fp8,fp8,0,0.1590720017751058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,128,0,1,float16,float16,0,0.16965333620707193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,128,0,1,float16,fp8,0,0.17099199692408243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,128,0,1,float16,float16,0,0.17410133282343546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,128,0,1,fp8,fp8,0,0.16354667147000632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,128,0,1,float16,fp8,0,0.17316800355911255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,128,0,1,fp8,fp8,0,0.16571733355522156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,128,0,1,float16,float16,0,0.10959466298421223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,128,0,1,float16,fp8,0,0.10813333590825398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,128,0,1,fp8,fp8,0,0.11036266883214314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,128,0,1,float16,float16,0,0.0909493366877238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,128,0,1,float16,fp8,0,0.09147733449935913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,128,0,1,float16,float16,0,0.0913706620534261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,128,0,1,fp8,fp8,0,0.0851093331972758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,128,0,1,float16,fp8,0,0.09314666191736858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,128,0,1,fp8,fp8,0,0.08825600147247314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,128,0,1,float16,float16,0,0.09386666615804036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,128,0,1,float16,fp8,0,0.09463999668757121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,128,0,1,float16,float16,0,0.061119998494784035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,128,0,1,fp8,fp8,0,0.093231995900472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,128,0,1,float16,fp8,0,0.05936533212661743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,128,0,1,fp8,fp8,0,0.06411199768384297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,128,0,1,float16,float16,0,0.054154664278030396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,128,0,1,float16,fp8,0,0.053898667295773826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,128,0,1,fp8,fp8,0,0.0497920016447703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,128,0,1,float16,float16,0,0.05540800094604492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,128,0,1,float16,fp8,0,0.05385600030422211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,128,0,1,fp8,fp8,0,0.04985600213209788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,128,0,1,float16,float16,0,0.055685331424077354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,128,0,1,float16,fp8,0,0.05399466554323832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,128,0,1,fp8,fp8,0,0.05020266771316528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,128,0,1,float16,float16,0,0.037434667348861694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,128,0,1,float16,fp8,0,0.037871999045213066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,128,0,1,fp8,fp8,0,0.03595733394225439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,128,0,1,float16,float16,0,0.03554133325815201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,128,0,1,float16,fp8,0,0.03666666646798452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,128,0,1,fp8,fp8,0,0.03369600077470144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,128,0,1,float16,float16,0,0.035616000493367515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,128,0,1,float16,fp8,0,0.035749333600203194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,128,0,1,fp8,fp8,0,0.03385599950949351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,128,0,1,float16,float16,0,0.03606399893760681
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,128,0,1,float16,fp8,0,0.0360000009338061
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,128,0,1,fp8,fp8,0,0.03356799980004629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,128,0,1,float16,float16,0,0.028143999477227528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,128,0,1,float16,fp8,0,0.02868266652027766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,128,0,1,fp8,fp8,0,0.02716800073782603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,128,0,1,float16,float16,0,0.027280000348885853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,128,0,1,float16,fp8,0,0.027301333844661713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,128,0,1,fp8,fp8,0,0.025285333395004272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,128,0,1,float16,float16,0,0.027402666707833607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,128,0,1,float16,fp8,0,0.02769600103298823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,128,0,1,fp8,fp8,0,0.025600001215934753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,128,0,1,float16,float16,0,0.027471999327341717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,128,0,1,float16,fp8,0,0.027450665831565857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,128,0,1,fp8,fp8,0,0.025445332129796345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,128,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,128,0,1,float16,fp8,0,0.021269333859284718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,128,0,1,fp8,fp8,0,0.020949333906173706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,128,0,1,float16,float16,0,0.019573333362738293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,128,0,1,float16,fp8,0,0.02048533285657565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,128,0,1,fp8,fp8,0,0.019487999379634857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,128,0,1,float16,float16,0,0.021104000508785248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,128,0,1,float16,fp8,0,0.02013333390156428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,128,0,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,128,0,1,float16,float16,0,0.021295999487241108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,128,0,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,128,0,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,128,0,1,float16,float16,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,128,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,128,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,128,0,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,128,0,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,128,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,128,0,1,float16,float16,0,0.018917333334684372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,128,0,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,128,0,1,float16,float16,0,0.019253333409627277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,128,0,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,128,0,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,128,0,1,float16,float16,0,0.40248000621795654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,128,0,1,fp8,fp8,0,0.3798559904098511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,128,0,1,float16,fp8,0,0.3999893267949422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,128,0,1,float16,float16,0,0.4084800084431966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,128,0,1,fp8,fp8,0,0.38861334323883057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,128,0,1,float16,fp8,0,0.4068640073140462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,128,0,1,float16,float16,0,0.41021867593129474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,128,0,1,fp8,fp8,0,0.3954079945882161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,128,0,1,float16,fp8,0,0.4103360176086426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,128,0,1,float16,fp8,0,0.23667200406392416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,128,0,1,float16,float16,0,0.210314671198527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,128,0,1,fp8,fp8,0,0.2373973329861959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,128,0,1,float16,float16,0,0.24155199527740479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,128,0,1,float16,fp8,0,0.21100799242655435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,128,0,1,fp8,fp8,0,0.19774399201075235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,128,0,1,float16,float16,0,0.2129866679509481
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,128,0,1,float16,fp8,0,0.21358400583267212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,128,0,1,fp8,fp8,0,0.20359466473261514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,128,0,1,float16,float16,0,0.21253865957260132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,128,0,1,float16,fp8,0,0.2154560089111328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,128,0,1,fp8,fp8,0,0.20675732692082724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,128,0,1,float16,float16,0,0.1281546652317047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,128,0,1,float16,fp8,0,0.12551466623942056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,128,0,1,fp8,fp8,0,0.12997866670290628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,128,0,1,float16,fp8,0,0.11196266611417134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,128,0,1,float16,float16,0,0.1120799978574117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,128,0,1,fp8,fp8,0,0.10342933734258015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,128,0,1,float16,float16,0,0.11193066835403442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,128,0,1,float16,fp8,0,0.11215466260910034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,128,0,1,fp8,fp8,0,0.1069546639919281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,128,0,1,float16,float16,0,0.11343466242154439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,128,0,1,float16,fp8,0,0.11385066310564677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,128,0,1,float16,float16,0,0.07041599849859874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,128,0,1,float16,fp8,0,0.06857066849867503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,128,0,1,fp8,fp8,0,0.07423466444015503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,128,0,1,fp8,fp8,0,0.11153067151705424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,128,0,1,float16,float16,0,0.06427200138568878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,128,0,1,float16,fp8,0,0.06443733473618825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,128,0,1,fp8,fp8,0,0.05974400043487549
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,128,0,1,float16,float16,0,0.0643039991458257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,128,0,1,float16,fp8,0,0.06436266501744588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,128,0,1,fp8,fp8,0,0.05995733539263407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,128,0,1,float16,float16,0,0.06438399851322174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,128,0,1,float16,fp8,0,0.06398933132489522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,128,0,1,fp8,fp8,0,0.0603359987338384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,128,0,1,float16,float16,0,0.041589332123597465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,128,0,1,float16,fp8,0,0.041877334316571556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,128,0,1,fp8,fp8,0,0.039701332648595176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,128,0,1,float16,float16,0,0.03967999915281931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,128,0,1,float16,fp8,0,0.03942399968703588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,128,0,1,fp8,fp8,0,0.037717332442601524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,128,0,1,float16,float16,0,0.03976000100374222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,128,0,1,float16,fp8,0,0.03957333415746689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,128,0,1,fp8,fp8,0,0.03774933268626531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,128,0,1,float16,float16,0,0.041946664452552795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,128,0,1,float16,fp8,0,0.04172799984614054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,128,0,1,fp8,fp8,0,0.03965866565704346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,128,0,1,float16,float16,0,0.029658667743206024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,128,0,1,float16,fp8,0,0.02959466725587845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,128,0,1,fp8,fp8,0,0.029306667546431225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,128,0,1,float16,float16,0,0.029343999922275543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,128,0,1,float16,fp8,0,0.02757866680622101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,128,0,1,fp8,fp8,0,0.027589333554108936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,128,0,1,float16,float16,0,0.029306667546431225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,128,0,1,float16,fp8,0,0.027818667391935985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,128,0,1,fp8,fp8,0,0.027317332724730175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,128,0,1,float16,float16,0,0.029194665451844532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,128,0,1,float16,fp8,0,0.02926933268706004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,128,0,1,fp8,fp8,0,0.027269333600997925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,128,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,128,0,1,float16,fp8,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,128,0,1,fp8,fp8,0,0.02346666653951009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,128,0,1,float16,float16,0,0.023120000958442688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,128,0,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,128,0,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,128,0,1,float16,float16,0,0.023333333432674408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,128,0,1,float16,fp8,0,0.023311999936898548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,128,0,1,fp8,fp8,0,0.023045333723227184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,128,0,1,float16,float16,0,0.023029332359631855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,128,0,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,128,0,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,128,0,1,float16,float16,0,0.01756799966096878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,128,0,1,float16,fp8,0,0.017530667285124462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,128,0,1,float16,float16,0,0.017386666188637417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,128,0,1,float16,float16,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,128,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,128,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,128,0,1,float16,fp8,0,0.018858666221300762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,128,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,128,0,1,float16,float16,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,128,0,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,128,0,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,128,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,128,0,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,128,0,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,128,0,1,float16,fp8,0,0.01844266677896182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,128,0,1,float16,fp8,0,0.017994667092959087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,128,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,128,0,1,float16,float16,0,0.2935466567675273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,128,0,1,fp8,fp8,0,0.28146666288375854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,128,0,1,float16,fp8,0,0.2959306637446086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,128,0,1,float16,float16,0,0.29690666993459064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,128,0,1,fp8,fp8,0,0.2864053249359131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,128,0,1,float16,fp8,0,0.2977013389269511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,128,0,1,float16,float16,0,0.2992960015932719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,128,0,1,float16,fp8,0,0.30135466655095416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,128,0,1,fp8,fp8,0,0.288154661655426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,128,0,1,float16,float16,0,0.17150932550430298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,128,0,1,float16,fp8,0,0.17095466454823813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,128,0,1,fp8,fp8,0,0.16990399360656738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,128,0,1,float16,float16,0,0.15500799814860025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,128,0,1,fp8,fp8,0,0.14468266566594443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,128,0,1,float16,float16,0,0.15667733550071716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,128,0,1,float16,fp8,0,0.15820266803105673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,128,0,1,float16,fp8,0,0.15545599659283957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,128,0,1,fp8,fp8,0,0.14800000190734863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,128,0,1,float16,float16,0,0.15772799650828043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,128,0,1,fp8,fp8,0,0.15131200353304544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,128,0,1,float16,fp8,0,0.15925332903862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,128,0,1,float16,float16,0,0.08973866701126099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,128,0,1,float16,fp8,0,0.08948266506195068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,128,0,1,fp8,fp8,0,0.09290666381518047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,128,0,1,float16,fp8,0,0.0830080012480418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,128,0,1,float16,float16,0,0.08344533046086629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,128,0,1,fp8,fp8,0,0.0787360022465388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,128,0,1,float16,float16,0,0.08408000071843465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,128,0,1,float16,fp8,0,0.08515733480453491
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,128,0,1,float16,float16,0,0.08328000207742055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,128,0,1,fp8,fp8,0,0.07935466865698497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,128,0,1,float16,fp8,0,0.08469866712888081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,128,0,1,fp8,fp8,0,0.08072000245253245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,128,0,1,float16,float16,0,0.05021866659323374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,128,0,1,float16,fp8,0,0.05036800106366476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,128,0,1,fp8,fp8,0,0.05065066615740458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,128,0,1,float16,float16,0,0.05026666820049286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,128,0,1,float16,fp8,0,0.049829334020614624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,128,0,1,fp8,fp8,0,0.04827733337879181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,128,0,1,float16,float16,0,0.05037866532802582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,128,0,1,float16,fp8,0,0.05019199848175049
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,128,0,1,fp8,fp8,0,0.04765866696834564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,128,0,1,float16,float16,0,0.05150400102138519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,128,0,1,float16,fp8,0,0.05040533343950907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,128,0,1,fp8,fp8,0,0.04781333108743032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,128,0,1,float16,float16,0,0.0336053321758906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,128,0,1,float16,fp8,0,0.033413333197434746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,128,0,1,fp8,fp8,0,0.03345600018898646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,128,0,1,float16,float16,0,0.03251733382542928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,128,0,1,float16,fp8,0,0.03385066737731298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,128,0,1,fp8,fp8,0,0.03169066707293192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,128,0,1,float16,float16,0,0.032144000132878624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,128,0,1,float16,fp8,0,0.03276800115903219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,128,0,1,fp8,fp8,0,0.03350399931271871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,128,0,1,float16,float16,0,0.033733333150545754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,128,0,1,float16,fp8,0,0.03328000009059906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,128,0,1,fp8,fp8,0,0.03328000009059906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,128,0,1,float16,fp8,0,0.025274666647116344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,128,0,1,float16,float16,0,0.025386666258176167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,128,0,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,128,0,1,float16,float16,0,0.025114665428797405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,128,0,1,float16,fp8,0,0.025562666356563568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,128,0,1,fp8,fp8,0,0.023546665906906128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,128,0,1,float16,float16,0,0.02362666775782903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,128,0,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,128,0,1,fp8,fp8,0,0.025077333052953083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,128,0,1,float16,float16,0,0.02480533222357432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,128,0,1,float16,fp8,0,0.02532266577084859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,128,0,1,fp8,fp8,0,0.025498665869235992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,128,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,128,0,1,float16,fp8,0,0.020117333779732387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,128,0,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,128,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,128,0,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,128,0,1,fp8,fp8,0,0.019754666835069656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,128,0,1,float16,float16,0,0.02024000013868014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,128,0,1,float16,fp8,0,0.021226666867733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,128,0,1,fp8,fp8,0,0.018837332725524902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,128,0,1,float16,float16,0,0.020341333001852036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,128,0,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,128,0,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,128,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,128,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,128,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,128,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,128,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,128,0,1,float16,fp8,0,0.01773333301146825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,128,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,128,0,1,float16,float16,0,0.01573333392540614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,128,0,1,float16,fp8,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,128,0,1,float16,float16,0,0.015306666493415833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,128,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,128,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,128,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,128,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,128,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,128,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,128,0,1,float16,float16,0,0.24092799425125122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,128,0,1,float16,fp8,0,0.24226667483647665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,128,0,1,fp8,fp8,0,0.2332586646080017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,128,0,1,float16,float16,0,0.24169600009918213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,128,0,1,fp8,fp8,0,0.2349546750386556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,128,0,1,float16,fp8,0,0.2428320050239563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,128,0,1,float16,float16,0,0.243450661500295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,128,0,1,float16,fp8,0,0.24401599168777466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,128,0,1,fp8,fp8,0,0.23841599623362222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,128,0,1,float16,float16,0,0.13368533054987589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,128,0,1,float16,fp8,0,0.13545067111651102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,128,0,1,fp8,fp8,0,0.1362879971663157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,128,0,1,float16,float16,0,0.1293706695238749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,128,0,1,float16,fp8,0,0.1281013290087382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,128,0,1,fp8,fp8,0,0.12179733316103618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,128,0,1,float16,float16,0,0.12728533148765564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,128,0,1,float16,fp8,0,0.12612266341845194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,128,0,1,fp8,fp8,0,0.12241599957148235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,128,0,1,float16,float16,0,0.12804266810417175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,128,0,1,float16,float16,0,0.07255466779073079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,128,0,1,float16,fp8,0,0.1302079955736796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,128,0,1,float16,fp8,0,0.07233066856861115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,128,0,1,fp8,fp8,0,0.12215466300646464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,128,0,1,fp8,fp8,0,0.0705386648575465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,128,0,1,float16,fp8,0,0.07101866602897644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,128,0,1,fp8,fp8,0,0.06843199829260509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,128,0,1,float16,float16,0,0.0711839993794759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,128,0,1,float16,float16,0,0.07141333321730296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,128,0,1,float16,fp8,0,0.07026666899522145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,128,0,1,fp8,fp8,0,0.070783997575442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,128,0,1,float16,float16,0,0.0719946672519048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,128,0,1,float16,fp8,0,0.07057066758473714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,128,0,1,fp8,fp8,0,0.07074666519959767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,128,0,1,float16,float16,0,0.044400001565615334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,128,0,1,float16,fp8,0,0.04367466767628988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,128,0,1,fp8,fp8,0,0.043935999274253845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,128,0,1,float16,fp8,0,0.04399466514587402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,128,0,1,float16,float16,0,0.04242133100827535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,128,0,1,fp8,fp8,0,0.04379733403523763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,128,0,1,float16,float16,0,0.043653334180514015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,128,0,1,float16,fp8,0,0.04386133452256521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,128,0,1,fp8,fp8,0,0.04165333261092504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,128,0,1,float16,float16,0,0.043663998444875084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,128,0,1,float16,fp8,0,0.044031997521718345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,128,0,1,float16,float16,0,0.029290666182835896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,128,0,1,fp8,fp8,0,0.0420959989229838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,128,0,1,float16,fp8,0,0.029904000461101532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,128,0,1,fp8,fp8,0,0.02975466599067052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,128,0,1,float16,float16,0,0.029343999922275543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,128,0,1,float16,fp8,0,0.030373332401116688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,128,0,1,fp8,fp8,0,0.029637334247430164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,128,0,1,float16,float16,0,0.02938133229811986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,128,0,1,float16,fp8,0,0.030960001051425934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,128,0,1,fp8,fp8,0,0.029648000995318096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,128,0,1,float16,float16,0,0.029743999242782593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,128,0,1,float16,fp8,0,0.029520000020662945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,128,0,1,fp8,fp8,0,0.029205332199732464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,128,0,1,float16,float16,0,0.02309333284695943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,128,0,1,float16,float16,0,0.021381333470344543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,128,0,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,128,0,1,fp8,fp8,0,0.02242133269707362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,128,0,1,float16,fp8,0,0.021781332790851593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,128,0,1,fp8,fp8,0,0.023354666928450268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,128,0,1,float16,float16,0,0.021253332495689392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,128,0,1,float16,fp8,0,0.023045333723227184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,128,0,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,128,0,1,float16,float16,0,0.021482666333516438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,128,0,1,float16,fp8,0,0.02313599983851115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,128,0,1,fp8,fp8,0,0.022965334355831146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,128,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,128,0,1,float16,fp8,0,0.019424000134070713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,128,0,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,128,0,1,float16,float16,0,0.019018666197856266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,128,0,1,float16,fp8,0,0.021205333371957142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,128,0,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,128,0,1,float16,float16,0,0.019120000302791595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,128,0,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,128,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,128,0,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,128,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,128,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,128,0,1,float16,float16,0,0.016234666109085083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,128,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,128,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,128,0,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,128,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,128,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,128,0,1,float16,float16,0,0.015418666104475657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,128,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,128,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,128,0,1,float16,float16,0,0.015568000574906668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,128,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,2,128,0,1,float16,float16,0,0.20232532421747842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,2,128,0,1,fp8,fp8,0,0.19173866510391235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,2,128,0,1,float16,fp8,0,0.20278932650883993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,4,128,0,1,float16,float16,0,0.20224533478418985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,4,128,0,1,float16,fp8,0,0.20332799355189005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,4,128,0,1,fp8,fp8,0,0.1914666692415873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,8,128,0,1,float16,float16,0,0.2022613286972046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,8,128,0,1,float16,fp8,0,0.20203733444213867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,40,8,128,0,1,fp8,fp8,0,0.19150932629903158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,0,0.10737066467603047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,0,0.1072106659412384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,40,128,0,1,fp8,fp8,0,0.1034346620241801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,2,128,0,1,float16,float16,0,0.10611200332641602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,2,128,0,1,fp8,fp8,0,0.10121599833170573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,4,128,0,1,float16,float16,0,0.10602133472760518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,2,128,0,1,float16,fp8,0,0.1088106632232666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,4,128,0,1,float16,fp8,0,0.10724266370137532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,4,128,0,1,fp8,fp8,0,0.10156266887982686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,8,128,0,1,float16,float16,0,0.1066986620426178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,8,128,0,1,float16,fp8,0,0.10839466253916423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,0,0.059989333152770996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,40,8,128,0,1,fp8,fp8,0,0.10315733154614766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,0,0.06203199923038483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,2,128,0,1,float16,float16,0,0.06028266747792562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,40,128,0,1,fp8,fp8,0,0.059024001161257424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,2,128,0,1,float16,fp8,0,0.06205866734186808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,2,128,0,1,fp8,fp8,0,0.059024001161257424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,4,128,0,1,float16,float16,0,0.062319998939832054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,4,128,0,1,float16,fp8,0,0.06132266422112783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,4,128,0,1,fp8,fp8,0,0.059605335195859276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,8,128,0,1,float16,float16,0,0.06043733159701029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,8,128,0,1,float16,fp8,0,0.06057600180308024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,40,8,128,0,1,fp8,fp8,0,0.05861333509286245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,0,0.037674665451049805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,0,0.03804266701141993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,40,128,0,1,fp8,fp8,0,0.03766400118668874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,2,128,0,1,float16,float16,0,0.03772799919048945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,2,128,0,1,float16,fp8,0,0.03979733337958654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,2,128,0,1,fp8,fp8,0,0.037605332831541695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,4,128,0,1,float16,float16,0,0.03781333317359289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,4,128,0,1,float16,fp8,0,0.039605334401130676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,4,128,0,1,fp8,fp8,0,0.037776000797748566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,8,128,0,1,float16,float16,0,0.03764266769091288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,8,128,0,1,float16,fp8,0,0.03777066618204117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,0,0.027823999524116516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,40,8,128,0,1,fp8,fp8,0,0.03748800108830134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,0,0.02769600103298823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,2,128,0,1,float16,float16,0,0.027632000545660656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,40,128,0,1,fp8,fp8,0,0.02737066646416982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,2,128,0,1,fp8,fp8,0,0.02731200059254964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,2,128,0,1,float16,fp8,0,0.02826666583617528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,4,128,0,1,float16,float16,0,0.02752000093460083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,4,128,0,1,float16,fp8,0,0.027509334186712902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,8,128,0,1,float16,float16,0,0.02716800073782603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,4,128,0,1,fp8,fp8,0,0.02758399893840154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,8,128,0,1,float16,fp8,0,0.028234665592511494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,40,8,128,0,1,fp8,fp8,0,0.02794666588306427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,0,0.021530665457248688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,0,0.023232000569502514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,40,128,0,1,fp8,fp8,0,0.02183466653029124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,2,128,0,1,float16,float16,0,0.023567999402681988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,2,128,0,1,float16,fp8,0,0.023034666975339253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,2,128,0,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,4,128,0,1,float16,float16,0,0.021397332350413006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,4,128,0,1,float16,fp8,0,0.023183998962243397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,4,128,0,1,fp8,fp8,0,0.021957332889238994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,8,128,0,1,float16,float16,0,0.021583999196688335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,8,128,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,40,8,128,0,1,fp8,fp8,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,0,0.019493332753578823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,0,0.01903466631968816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,40,128,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,2,128,0,1,float16,float16,0,0.019280000279347103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,2,128,0,1,float16,fp8,0,0.019839999576409657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,2,128,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,4,128,0,1,float16,float16,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,4,128,0,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,4,128,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,8,128,0,1,float16,float16,0,0.019541333119074505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,8,128,0,1,float16,fp8,0,0.01939733326435089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,40,8,128,0,1,fp8,fp8,0,0.019434666881958645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,40,128,0,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,2,128,0,1,float16,float16,0,0.017658667018016178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,2,128,0,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,2,128,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,4,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,4,128,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,4,128,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,8,128,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,8,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,40,8,128,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,40,128,0,1,fp8,fp8,0,0.016421332955360413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,2,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,2,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,2,128,0,1,fp8,fp8,0,0.01754133279124896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,4,128,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,4,128,0,1,float16,fp8,0,0.015781333049138386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,4,128,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,8,128,0,1,float16,float16,0,0.015306666493415833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,8,128,0,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,40,8,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,128,0,1,fp8,fp8,0,13.064165751139322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,128,0,1,float16,fp8,0,19.628111521402996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,128,0,1,float16,float16,0,20.08690643310547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,128,0,1,float16,float16,0,20.165877024332683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,128,0,1,fp8,fp8,0,12.663077036539713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,128,0,1,float16,fp8,0,22.596773783365887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,128,0,1,float16,fp8,0,21.693097432454426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,128,0,1,float16,float16,0,22.191253662109375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,128,0,1,fp8,fp8,0,12.645370483398438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,128,0,1,float16,float16,0,10.5872802734375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,128,0,1,fp8,fp8,0,14.268458048502604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,128,0,1,float16,float16,0,22.9344965616862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,128,0,1,float16,fp8,0,21.22256596883138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,128,0,1,fp8,fp8,0,7.486522674560547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,128,0,1,float16,fp8,0,10.327909469604492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,128,0,1,float16,float16,0,10.987813313802084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,128,0,1,fp8,fp8,0,7.130890528361003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,128,0,1,float16,fp8,0,10.676469167073568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,128,0,1,float16,float16,0,11.537344614664713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,128,0,1,fp8,fp8,0,6.029008229573567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,128,0,1,float16,fp8,0,11.126906077067057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,128,0,1,float16,float16,0,10.944608052571615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,128,0,1,fp8,fp8,0,6.076837539672852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,128,0,1,float16,fp8,0,11.604591369628906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,128,0,1,fp8,fp8,0,6.083786646525065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,128,0,1,float16,float16,0,11.174954732259115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,128,0,1,float16,float16,0,6.156917572021484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,128,0,1,float16,fp8,0,11.087467193603516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,128,0,1,float16,fp8,0,5.595984141031901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,128,0,1,fp8,fp8,0,3.4213333129882812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,128,0,1,fp8,fp8,0,2.9442294438680015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,128,0,1,float16,float16,0,5.786901473999023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,128,0,1,float16,fp8,0,5.577834447224935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,128,0,1,float16,float16,0,5.579978942871094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,128,0,1,float16,fp8,0,4.8630720774332685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,128,0,1,fp8,fp8,0,3.790698687235514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,128,0,1,float16,float16,0,5.811909357706706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,128,0,1,float16,fp8,0,5.714186350504558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,128,0,1,fp8,fp8,0,2.973557472229004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,128,0,1,float16,float16,0,5.564309438069661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,128,0,1,float16,float16,0,2.3551626205444336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,128,0,1,fp8,fp8,0,3.3979625701904297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,128,0,1,float16,fp8,0,5.383013407389323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,128,0,1,float16,fp8,0,2.6887734731038413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,128,0,1,fp8,fp8,0,1.6599094072977703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,128,0,1,float16,float16,0,2.0844799677530923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,128,0,1,float16,fp8,0,2.3939359982808432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,128,0,1,fp8,fp8,0,1.6105173428853352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,128,0,1,float16,float16,0,2.0750560760498047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,128,0,1,float16,fp8,0,2.857269287109375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,128,0,1,fp8,fp8,0,1.6114346186319988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,128,0,1,float16,float16,0,2.6656853357950845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,128,0,1,float16,fp8,0,1.8998613357543945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,128,0,1,fp8,fp8,0,1.619765281677246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,128,0,1,float16,float16,0,2.132469336191813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,128,0,1,fp8,fp8,0,1.6229333877563477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,128,0,1,float16,fp8,0,2.7551307678222656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,128,0,1,fp8,fp8,0,7.028106689453125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,128,0,1,float16,fp8,0,12.44863510131836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,128,0,1,float16,float16,0,12.996106465657553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,128,0,1,float16,float16,0,13.32373301188151
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,128,0,1,fp8,fp8,0,7.815423965454102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,128,0,1,float16,fp8,0,12.174933115641275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,128,0,1,float16,float16,0,13.66866683959961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,128,0,1,float16,fp8,0,13.2871462504069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,128,0,1,fp8,fp8,0,7.823477427164714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,128,0,1,float16,float16,0,12.006292978922525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,128,0,1,float16,float16,0,6.590490976969401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,128,0,1,fp8,fp8,0,8.500895818074545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,128,0,1,float16,fp8,0,12.92832056681315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,128,0,1,float16,fp8,0,6.272677103678386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,128,0,1,fp8,fp8,0,3.8529173533121743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,128,0,1,float16,float16,0,6.375866572062175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,128,0,1,fp8,fp8,0,3.40556271870931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,128,0,1,float16,fp8,0,6.1792958577473955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,128,0,1,float16,float16,0,6.370842615763347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,128,0,1,fp8,fp8,0,3.5315732955932617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,128,0,1,float16,fp8,0,6.669365564982097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,128,0,1,fp8,fp8,0,3.5925706227620444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,128,0,1,float16,fp8,0,6.3160959879557295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,128,0,1,float16,float16,0,6.610714594523112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,128,0,1,float16,float16,0,3.4874134063720703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,128,0,1,fp8,fp8,0,3.597344080607096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,128,0,1,float16,float16,0,6.397674560546875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,128,0,1,float16,fp8,0,6.096325556437175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,128,0,1,fp8,fp8,0,1.8673760096232097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,128,0,1,float16,fp8,0,3.4401652018229165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,128,0,1,float16,float16,0,3.2093226114908853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,128,0,1,float16,fp8,0,2.112160046895345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,128,0,1,fp8,fp8,0,1.7949013710021973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,128,0,1,float16,float16,0,3.1085920333862305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,128,0,1,fp8,fp8,0,1.7973492940266926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,128,0,1,float16,fp8,0,3.0967092514038086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,128,0,1,float16,float16,0,2.555797259012858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,128,0,1,fp8,fp8,0,1.8096106847127278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,128,0,1,float16,fp8,0,3.3024266560872397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,128,0,1,float16,fp8,0,2.846831957499186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,128,0,1,float16,float16,0,3.3740854263305664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,128,0,1,fp8,fp8,0,1.8194133440653484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,128,0,1,float16,float16,0,1.4901493390401204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,128,0,1,float16,fp8,0,1.349071979522705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,128,0,1,float16,float16,0,1.1747252941131592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,128,0,1,fp8,fp8,0,1.1012319723765056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,128,0,1,fp8,fp8,0,1.0109333197275798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,128,0,1,float16,fp8,0,1.166927973429362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,128,0,1,float16,float16,0,1.2301226456960042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,128,0,1,float16,fp8,0,1.147648016611735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,128,0,1,fp8,fp8,0,1.0115946928660076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,128,0,1,float16,float16,0,1.1950506369272869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,128,0,1,float16,fp8,0,1.1613600254058838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,128,0,1,fp8,fp8,0,1.0130080382029216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,128,0,1,float16,float16,0,1.153882662455241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,128,0,1,float16,fp8,0,1.161962668100993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,128,0,1,fp8,fp8,0,1.0210986932118733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,128,0,1,fp8,fp8,0,5.613162358601888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,128,0,1,float16,float16,0,9.084122975667318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,128,0,1,float16,float16,0,7.939456303914388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,128,0,1,float16,fp8,0,8.721813201904297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,128,0,1,fp8,fp8,0,5.145541191101074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,128,0,1,float16,float16,0,9.022944132486979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,128,0,1,float16,fp8,0,9.099231719970703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,128,0,1,fp8,fp8,0,5.300341288248698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,128,0,1,float16,fp8,0,8.61678949991862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,128,0,1,fp8,fp8,0,5.660218556722005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,128,0,1,float16,float16,0,9.434544245402018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,128,0,1,float16,float16,0,4.585119883219401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,128,0,1,float16,fp8,0,9.472981135050455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,128,0,1,float16,fp8,0,4.337552070617676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,128,0,1,fp8,fp8,0,2.921205202738444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,128,0,1,float16,float16,0,4.261983871459961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,128,0,1,float16,fp8,0,4.834933280944824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,128,0,1,fp8,fp8,0,2.507194677988688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,128,0,1,float16,float16,0,4.3542985916137695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,128,0,1,fp8,fp8,0,2.8539145787556968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,128,0,1,float16,float16,0,3.5899785359700522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,128,0,1,float16,fp8,0,4.436815897623698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,128,0,1,float16,fp8,0,4.552666664123535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,128,0,1,fp8,fp8,0,2.7951625188191733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,128,0,1,float16,float16,0,4.194021224975586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,128,0,1,float16,float16,0,1.5475467046101887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,128,0,1,float16,fp8,0,4.993669191996257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,128,0,1,fp8,fp8,0,2.6124000549316406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,128,0,1,fp8,fp8,0,1.3874185880025227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,128,0,1,float16,fp8,0,1.7474452654520671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,128,0,1,float16,float16,0,2.216218630472819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,128,0,1,float16,fp8,0,1.6042346954345703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,128,0,1,fp8,fp8,0,1.322981357574463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,128,0,1,float16,float16,0,1.9729119936625164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,128,0,1,fp8,fp8,0,1.3232533137003581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,128,0,1,float16,fp8,0,2.0691466331481934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,128,0,1,float16,float16,0,1.9021652539571126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,128,0,1,fp8,fp8,0,1.3286773363749187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,128,0,1,float16,fp8,0,2.0234880447387695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,128,0,1,float16,float16,0,1.673456033070882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,128,0,1,float16,fp8,0,1.7441333134969075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,128,0,1,fp8,fp8,0,1.3366986910502117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,128,0,1,float16,float16,0,1.2055892944335938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,128,0,1,float16,fp8,0,0.9235946337381998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,128,0,1,fp8,fp8,0,0.790175994237264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,128,0,1,float16,float16,0,0.8574079672495524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,128,0,1,float16,fp8,0,0.9688959916432699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,128,0,1,float16,float16,0,0.856719970703125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,128,0,1,fp8,fp8,0,1.0745866298675537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,128,0,1,float16,fp8,0,0.8594826857248942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,128,0,1,fp8,fp8,0,0.7964746952056885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,128,0,1,float16,float16,0,0.857093334197998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,128,0,1,float16,fp8,0,0.8708586692810059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,128,0,1,fp8,fp8,0,0.8368213176727295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,128,0,1,float16,float16,0,0.8807893594106039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,128,0,1,float16,fp8,0,0.8659359614054362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,128,0,1,fp8,fp8,0,0.7762880325317383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,128,0,1,fp8,fp8,0,7.807008107503255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,128,0,1,float16,fp8,0,10.57693354288737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,128,0,1,float16,float16,0,11.519242604573568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,128,0,1,float16,float16,0,11.57473627726237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,128,0,1,fp8,fp8,0,7.025946935017903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,128,0,1,float16,fp8,0,12.028010050455729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,128,0,1,float16,float16,0,12.20361073811849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,128,0,1,float16,fp8,0,13.344900767008463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,128,0,1,fp8,fp8,0,7.687717437744141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,128,0,1,float16,float16,0,6.169066747029622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,128,0,1,fp8,fp8,0,8.01586659749349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,128,0,1,float16,float16,0,12.691195170084635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,128,0,1,float16,fp8,0,11.985599517822266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,128,0,1,float16,fp8,0,6.428378423055013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,128,0,1,fp8,fp8,0,4.124405225118001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,128,0,1,float16,float16,0,6.440234502156575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,128,0,1,float16,fp8,0,6.142704010009766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,128,0,1,fp8,fp8,0,3.3113120396931968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,128,0,1,float16,float16,0,6.170517603556315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,128,0,1,fp8,fp8,0,3.5911680857340493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,128,0,1,float16,fp8,0,6.290463765462239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,128,0,1,float16,float16,0,5.695946375528972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,128,0,1,fp8,fp8,0,3.581786791483561
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,128,0,1,float16,fp8,0,6.6083628336588545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,128,0,1,float16,float16,0,6.2734025319417315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,128,0,1,float16,fp8,0,5.787413279215495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,128,0,1,float16,float16,0,2.9897705713907876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,128,0,1,fp8,fp8,0,3.731541315714518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,128,0,1,fp8,fp8,0,1.8845173517862956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,128,0,1,float16,fp8,0,3.1320320765177407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,128,0,1,float16,fp8,0,2.4579466183980307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,128,0,1,float16,float16,0,2.8610452016194663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,128,0,1,fp8,fp8,0,1.710213343302409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,128,0,1,float16,float16,0,2.953242619832357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,128,0,1,float16,fp8,0,2.5393813451131186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,128,0,1,fp8,fp8,0,1.9774452845255535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,128,0,1,float16,float16,0,1.9447360038757324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,128,0,1,float16,float16,0,1.988906701405843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,128,0,1,fp8,fp8,0,1.761621316274007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,128,0,1,float16,fp8,0,2.472117265065511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,128,0,1,float16,fp8,0,2.52457062403361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,128,0,1,float16,float16,0,1.1338079770406086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,128,0,1,float16,fp8,0,1.6232693990071614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,128,0,1,fp8,fp8,0,1.8968213399251301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,128,0,1,float16,float16,0,1.1165706316630046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,128,0,1,fp8,fp8,0,1.4802613258361816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,128,0,1,float16,fp8,0,1.3353385925292969
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,128,0,1,fp8,fp8,0,0.9284693400065104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,128,0,1,float16,float16,0,1.0552106698354085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,128,0,1,fp8,fp8,0,0.9262879689534506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,128,0,1,float16,fp8,0,1.0573866367340088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,128,0,1,float16,float16,0,1.0642613569895427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,128,0,1,float16,fp8,0,1.0606666405995686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,128,0,1,fp8,fp8,0,0.9294133186340332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,128,0,1,float16,float16,0,1.0717493693033855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,128,0,1,fp8,fp8,0,0.9336960315704346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,128,0,1,float16,float16,0,0.6814026832580566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,128,0,1,float16,fp8,0,1.2230026721954346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,128,0,1,float16,fp8,0,0.6308533350626627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,128,0,1,fp8,fp8,0,0.6539466778437296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,128,0,1,float16,float16,0,0.7360479831695557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,128,0,1,float16,fp8,0,0.6021706660588583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,128,0,1,fp8,fp8,0,0.542464017868042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,128,0,1,float16,float16,0,0.6060906648635864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,128,0,1,fp8,fp8,0,0.5468639930089315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,128,0,1,float16,fp8,0,0.6047413349151611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,128,0,1,float16,float16,0,0.6100586652755737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,128,0,1,float16,fp8,0,0.6147199869155884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,128,0,1,fp8,fp8,0,0.5486506621042887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,128,0,1,float16,float16,0,0.6083893378575643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,128,0,1,float16,fp8,0,0.6138773361841837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,128,0,1,fp8,fp8,0,0.5490880012512207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,128,0,1,fp8,fp8,0,4.470010757446289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,128,0,1,float16,float16,0,6.1430238087972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,128,0,1,float16,fp8,0,6.994954427083333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,128,0,1,float16,float16,0,6.878032048543294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,128,0,1,fp8,fp8,0,3.939722696940104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,128,0,1,float16,fp8,0,6.562186559041341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,128,0,1,float16,float16,0,7.068485260009766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,128,0,1,float16,fp8,0,6.95966911315918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,128,0,1,fp8,fp8,0,3.970575968424479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,128,0,1,float16,float16,0,3.319471995035807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,128,0,1,float16,float16,0,7.146736145019531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,128,0,1,fp8,fp8,0,4.487354596455892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,128,0,1,fp8,fp8,0,2.1933652559916177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,128,0,1,float16,fp8,0,7.730176289876302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,128,0,1,float16,fp8,0,3.1207148234049478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,128,0,1,float16,float16,0,2.9691947301228843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,128,0,1,fp8,fp8,0,2.308080037434896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,128,0,1,float16,fp8,0,3.5040372212727866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,128,0,1,float16,float16,0,3.2349494298299155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,128,0,1,float16,fp8,0,3.8762025833129883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,128,0,1,fp8,fp8,0,2.3369439442952475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,128,0,1,float16,float16,0,3.450079917907715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,128,0,1,fp8,fp8,0,2.1820212999979653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,128,0,1,float16,fp8,0,3.1948267618815103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,128,0,1,float16,float16,0,3.7017494837443032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,128,0,1,float16,float16,0,1.5318986574808757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,128,0,1,fp8,fp8,0,2.4005120595296225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,128,0,1,float16,fp8,0,3.455162684122721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,128,0,1,fp8,fp8,0,1.6338879267374675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,128,0,1,float16,fp8,0,1.7208800315856934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,128,0,1,float16,float16,0,1.3759199778238933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,128,0,1,float16,fp8,0,1.4234347343444824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,128,0,1,fp8,fp8,0,1.108346700668335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,128,0,1,float16,fp8,0,1.480474630991618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,128,0,1,float16,float16,0,1.3973600069681804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,128,0,1,fp8,fp8,0,1.270517349243164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,128,0,1,float16,float16,0,1.2157653172810872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,128,0,1,fp8,fp8,0,1.1856213410695393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,128,0,1,float16,fp8,0,1.278335968653361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,128,0,1,float16,float16,0,1.2290186882019043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,128,0,1,float16,fp8,0,1.3532320658365886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,128,0,1,float16,float16,0,0.8353493213653564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,128,0,1,fp8,fp8,0,1.0766186714172363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,128,0,1,float16,fp8,0,0.7729120254516602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,128,0,1,fp8,fp8,0,0.6361973285675049
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,128,0,1,float16,float16,0,0.6854399840037028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,128,0,1,float16,fp8,0,0.6724159717559814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,128,0,1,fp8,fp8,0,0.5944106578826904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,128,0,1,float16,float16,0,0.6697920163472494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,128,0,1,float16,fp8,0,0.6832586924235026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,128,0,1,fp8,fp8,0,0.6279679934183756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,128,0,1,float16,float16,0,0.676362673441569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,128,0,1,float16,fp8,0,0.6773386796315511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,128,0,1,fp8,fp8,0,0.6016373236974081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,128,0,1,float16,float16,0,0.6901600360870361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,128,0,1,float16,float16,0,0.4129439989725749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,128,0,1,float16,fp8,0,0.6798986593882242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,128,0,1,fp8,fp8,0,0.604416012763977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,128,0,1,float16,fp8,0,0.41649067401885986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,128,0,1,fp8,fp8,0,0.3800479968388875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,128,0,1,float16,float16,0,0.3918773333231608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,128,0,1,float16,fp8,0,0.39237332344055176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,128,0,1,fp8,fp8,0,0.35659201939900714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,128,0,1,float16,float16,0,0.39391998449961346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,128,0,1,float16,fp8,0,0.39477332433064777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,128,0,1,fp8,fp8,0,0.3613119920094808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,128,0,1,float16,float16,0,0.3930026690165202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,128,0,1,fp8,fp8,0,0.3605333169301351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,128,0,1,float16,fp8,0,0.3978826602300008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,128,0,1,float16,float16,0,0.40059200922648114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,128,0,1,float16,fp8,0,0.3996106783548991
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,128,0,1,fp8,fp8,0,0.36134934425354004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,128,0,1,fp8,fp8,0,4.527797381083171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,128,0,1,float16,float16,0,6.547840118408203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,128,0,1,float16,fp8,0,5.937744140625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,128,0,1,fp8,fp8,0,4.053754806518555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,128,0,1,float16,float16,0,6.7751038869222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,128,0,1,float16,fp8,0,6.896165211995442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,128,0,1,float16,float16,0,6.484949111938477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,128,0,1,float16,fp8,0,6.045221328735352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,128,0,1,fp8,fp8,0,4.56715202331543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,128,0,1,float16,float16,0,6.993343989054362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,128,0,1,float16,float16,0,3.4816694259643555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,128,0,1,float16,fp8,0,6.68229866027832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,128,0,1,fp8,fp8,0,4.211973190307617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,128,0,1,float16,fp8,0,4.107114791870117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,128,0,1,fp8,fp8,0,2.242032051086426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,128,0,1,float16,fp8,0,2.486944039662679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,128,0,1,float16,float16,0,2.7524534861246743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,128,0,1,fp8,fp8,0,2.2557973861694336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,128,0,1,float16,float16,0,3.3758185704549155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,128,0,1,fp8,fp8,0,2.0379679997762046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,128,0,1,float16,fp8,0,2.9930667877197266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,128,0,1,float16,float16,0,2.8420000076293945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,128,0,1,fp8,fp8,0,2.045093377431234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,128,0,1,float16,fp8,0,3.5524266560872397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,128,0,1,float16,float16,0,2.9291038513183594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,128,0,1,fp8,fp8,0,2.102224032084147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,128,0,1,float16,float16,0,1.6299947102864583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,128,0,1,float16,fp8,0,1.6011679967244465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,128,0,1,float16,fp8,0,3.586149215698242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,128,0,1,fp8,fp8,0,1.168826659520467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,128,0,1,fp8,fp8,0,1.1133653322855632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,128,0,1,float16,fp8,0,1.4843254089355469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,128,0,1,float16,float16,0,1.504426638285319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,128,0,1,float16,float16,0,1.2697493235270183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,128,0,1,float16,fp8,0,1.2165066401163738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,128,0,1,fp8,fp8,0,1.263269344965617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,128,0,1,float16,float16,0,1.3521973292032878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,128,0,1,float16,fp8,0,1.2312426567077637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,128,0,1,fp8,fp8,0,1.2631733417510986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,128,0,1,float16,float16,0,1.2188693682352703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,128,0,1,fp8,fp8,0,1.0795146624247234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,128,0,1,float16,fp8,0,1.3572160402933757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,128,0,1,float16,float16,0,0.6916639804840088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,128,0,1,float16,fp8,0,0.6993707021077474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,128,0,1,fp8,fp8,0,0.6269813378651937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,128,0,1,float16,float16,0,0.7120640277862549
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,128,0,1,float16,fp8,0,0.6480640172958374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,128,0,1,fp8,fp8,0,0.6704533100128174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,128,0,1,float16,float16,0,0.6478879849116007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,128,0,1,fp8,fp8,0,0.5787520011266073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,128,0,1,float16,fp8,0,0.6488053401311239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,128,0,1,float16,float16,0,0.6504053274790446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,128,0,1,float16,fp8,0,0.6538559993108114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,128,0,1,fp8,fp8,0,0.5809813340504965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,128,0,1,float16,float16,0,0.6543519894282023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,128,0,1,float16,fp8,0,0.6612000068028768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,128,0,1,float16,float16,0,0.3911733229955037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,128,0,1,fp8,fp8,0,0.5859466791152954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,128,0,1,float16,fp8,0,0.4190026521682739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,128,0,1,fp8,fp8,0,0.35983999570210773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,128,0,1,float16,float16,0,0.36210131645202637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,128,0,1,fp8,fp8,0,0.33030933141708374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,128,0,1,float16,fp8,0,0.3628213405609131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,128,0,1,float16,float16,0,0.37057065963745117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,128,0,1,float16,fp8,0,0.3705333471298218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,128,0,1,fp8,fp8,0,0.3317280014355977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,128,0,1,float16,float16,0,0.3683306773503621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,128,0,1,float16,fp8,0,0.3717813491821289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,128,0,1,fp8,fp8,0,0.3352320194244385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,128,0,1,float16,float16,0,0.37170668443044025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,128,0,1,float16,fp8,0,0.3741973241170247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,128,0,1,fp8,fp8,0,0.3402773141860962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,128,0,1,float16,float16,0,0.24037333329518637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,128,0,1,float16,fp8,0,0.24027733008066812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,128,0,1,fp8,fp8,0,0.2203893264134725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,128,0,1,float16,float16,0,0.22402133544286093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,128,0,1,float16,fp8,0,0.22670932610829672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,128,0,1,fp8,fp8,0,0.20636266469955444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,128,0,1,float16,float16,0,0.22239466508229574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,128,0,1,fp8,fp8,0,0.20764267444610596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,128,0,1,float16,fp8,0,0.22441067298253378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,128,0,1,float16,float16,0,0.22308266162872314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,128,0,1,float16,fp8,0,0.22219200929005942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,128,0,1,fp8,fp8,0,0.20974934101104736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,128,0,1,float16,float16,0,0.22687466939290366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,128,0,1,float16,fp8,0,0.2263466715812683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,128,0,1,fp8,fp8,0,0.20997333526611328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,128,0,1,fp8,fp8,0,2.57259194056193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,128,0,1,float16,fp8,0,2.8889760971069336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,128,0,1,float16,float16,0,3.3204692204793296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,128,0,1,float16,float16,0,3.7142454783121743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,128,0,1,fp8,fp8,0,2.588912010192871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,128,0,1,float16,fp8,0,4.067104021708171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,128,0,1,float16,fp8,0,3.936453183492025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,128,0,1,float16,float16,0,3.7794345219930015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,128,0,1,fp8,fp8,0,2.60917329788208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,128,0,1,float16,float16,0,3.9319626490275064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,128,0,1,fp8,fp8,0,2.648026625315348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,128,0,1,float16,float16,0,1.7225546836853027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,128,0,1,float16,fp8,0,3.8455947240193686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,128,0,1,float16,fp8,0,1.8437013626098633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,128,0,1,fp8,fp8,0,1.4762825965881348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,128,0,1,float16,float16,0,1.7801920572916667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,128,0,1,fp8,fp8,0,1.307744026184082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,128,0,1,float16,fp8,0,1.5180586179097493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,128,0,1,float16,float16,0,1.604650656382243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,128,0,1,fp8,fp8,0,1.3157493273417156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,128,0,1,float16,fp8,0,1.489664077758789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,128,0,1,float16,float16,0,1.6098933219909668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,128,0,1,fp8,fp8,0,1.3238133589426677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,128,0,1,float16,fp8,0,1.585103988647461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,128,0,1,float16,float16,0,1.5138986905415852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,128,0,1,fp8,fp8,0,1.348031997680664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,128,0,1,float16,fp8,0,1.7365387280782063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,128,0,1,float16,float16,0,0.8575253486633301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,128,0,1,float16,fp8,0,0.8709332942962646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,128,0,1,fp8,fp8,0,0.7725546360015869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,128,0,1,float16,float16,0,0.9998026688893636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,128,0,1,float16,fp8,0,0.7846399943033854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,128,0,1,fp8,fp8,0,0.6934506893157959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,128,0,1,float16,float16,0,0.7843946615854899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,128,0,1,float16,fp8,0,0.8087680339813232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,128,0,1,fp8,fp8,0,0.71561066309611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,128,0,1,float16,float16,0,0.7919946511586508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,128,0,1,float16,fp8,0,0.7898560365041097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,128,0,1,fp8,fp8,0,0.7989813486735026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,128,0,1,float16,float16,0,0.7995146910349528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,128,0,1,float16,fp8,0,0.8096319834391276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,128,0,1,fp8,fp8,0,0.710256020228068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,128,0,1,float16,float16,0,0.49887998898824054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,128,0,1,float16,fp8,0,0.4684799909591675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,128,0,1,fp8,fp8,0,0.42747732003529865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,128,0,1,float16,float16,0,0.42292265097300213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,128,0,1,float16,fp8,0,0.42583998044331867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,128,0,1,fp8,fp8,0,0.3818613290786743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,128,0,1,float16,float16,0,0.430458664894104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,128,0,1,float16,fp8,0,0.4285600185394287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,128,0,1,fp8,fp8,0,0.38278400897979736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,128,0,1,float16,float16,0,0.4299786488215129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,128,0,1,float16,fp8,0,0.4330666859944661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,128,0,1,fp8,fp8,0,0.3853333393732707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,128,0,1,float16,float16,0,0.43505601088205975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,128,0,1,float16,fp8,0,0.43926934401194256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,128,0,1,float16,float16,0,0.26472532749176025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,128,0,1,fp8,fp8,0,0.391482671101888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,128,0,1,float16,fp8,0,0.26843732595443726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,128,0,1,fp8,fp8,0,0.24624532461166382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,128,0,1,float16,float16,0,0.24018667141596475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,128,0,1,fp8,fp8,0,0.223088006178538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,128,0,1,float16,fp8,0,0.2418880065282186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,128,0,1,float16,float16,0,0.24110400676727295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,128,0,1,float16,fp8,0,0.2440213362375895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,128,0,1,fp8,fp8,0,0.22433066368103027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,128,0,1,float16,float16,0,0.24772799015045166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,128,0,1,float16,fp8,0,0.24674133459726968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,128,0,1,fp8,fp8,0,0.22573866446812949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,128,0,1,float16,float16,0,0.24875199794769287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,128,0,1,float16,fp8,0,0.25287999709447223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,128,0,1,fp8,fp8,0,0.23028266429901123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,128,0,1,float16,float16,0,0.16354133685429892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,128,0,1,float16,fp8,0,0.16615999738375345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,128,0,1,fp8,fp8,0,0.15876799821853638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,128,0,1,float16,fp8,0,0.15256533026695251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,128,0,1,float16,float16,0,0.15397333105405173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,128,0,1,fp8,fp8,0,0.14485866824785867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,128,0,1,float16,float16,0,0.1520960032939911
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,128,0,1,float16,fp8,0,0.15285332997639975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,128,0,1,fp8,fp8,0,0.144405335187912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,128,0,1,float16,float16,0,0.15159466862678528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,128,0,1,float16,fp8,0,0.15433067083358765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,128,0,1,fp8,fp8,0,0.14282666643460593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,128,0,1,float16,float16,0,0.15467733144760132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,128,0,1,float16,fp8,0,0.15467199683189392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,128,0,1,fp8,fp8,0,0.14325333635012308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,128,0,1,fp8,fp8,0,2.8272053400675454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,128,0,1,float16,fp8,0,3.1785386403401694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,128,0,1,float16,float16,0,3.6657705307006836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,128,0,1,float16,float16,0,3.924282709757487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,128,0,1,fp8,fp8,0,2.8534186681111655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,128,0,1,float16,fp8,0,4.0407358805338545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,128,0,1,float16,float16,0,3.7908852895100913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,128,0,1,float16,fp8,0,3.503376007080078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,128,0,1,fp8,fp8,0,2.8753814697265625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,128,0,1,fp8,fp8,0,2.943370819091797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,128,0,1,float16,float16,0,4.101594607035319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,128,0,1,float16,fp8,0,3.966064135233561
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,128,0,1,float16,float16,0,1.9049280484517415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,128,0,1,float16,fp8,0,1.8346239725748699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,128,0,1,fp8,fp8,0,1.6394240061442058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,128,0,1,float16,float16,0,1.6046613057454426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,128,0,1,float16,fp8,0,1.6877652804056804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,128,0,1,float16,float16,0,1.6063413619995117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,128,0,1,fp8,fp8,0,1.6657813390096028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,128,0,1,float16,fp8,0,1.6546613375345867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,128,0,1,fp8,fp8,0,1.4377759297688801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,128,0,1,float16,fp8,0,1.6375199953715007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,128,0,1,float16,float16,0,1.7458240191141765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,128,0,1,fp8,fp8,0,1.4445759455362956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,128,0,1,float16,float16,0,1.6413280169169109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,128,0,1,float16,float16,0,0.9220320383707682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,128,0,1,float16,fp8,0,0.9390347003936768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,128,0,1,fp8,fp8,0,1.4778505961100261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,128,0,1,float16,fp8,0,1.8965120315551758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,128,0,1,fp8,fp8,0,0.8506133556365967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,128,0,1,float16,float16,0,0.826154629389445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,128,0,1,float16,fp8,0,0.8335413138071696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,128,0,1,fp8,fp8,0,0.7405493259429932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,128,0,1,float16,float16,0,0.8316960334777832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,128,0,1,float16,fp8,0,0.8437066872914633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,128,0,1,fp8,fp8,0,0.7439573605855306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,128,0,1,float16,float16,0,1.022869348526001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,128,0,1,float16,fp8,0,0.8469866911570231
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,128,0,1,fp8,fp8,0,0.7510666847229004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,128,0,1,float16,float16,0,0.8508373101552328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,128,0,1,fp8,fp8,0,0.7651893297831217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,128,0,1,float16,fp8,0,0.9617760181427002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,128,0,1,float16,float16,0,0.4938720067342122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,128,0,1,float16,fp8,0,0.4996906518936157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,128,0,1,fp8,fp8,0,0.45261867841084796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,128,0,1,float16,float16,0,0.4381440083185832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,128,0,1,fp8,fp8,0,0.3976586659749349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,128,0,1,float16,fp8,0,0.4423733154932658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,128,0,1,float16,float16,0,0.44233067830403644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,128,0,1,fp8,fp8,0,0.3999626636505127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,128,0,1,float16,fp8,0,0.4474986791610718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,128,0,1,float16,float16,0,0.44673601786295575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,128,0,1,float16,fp8,0,0.45158398151397705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,128,0,1,fp8,fp8,0,0.4048373301823934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,128,0,1,float16,fp8,0,0.45732800165812176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,128,0,1,float16,float16,0,0.4527999957402547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,128,0,1,float16,float16,0,0.2690133253733317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,128,0,1,fp8,fp8,0,0.4098293383916219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,128,0,1,float16,fp8,0,0.27674667040507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,128,0,1,float16,float16,0,0.24288533131281534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,128,0,1,fp8,fp8,0,0.25222933292388916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,128,0,1,float16,fp8,0,0.24077334006627402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,128,0,1,fp8,fp8,0,0.22337599595387778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,128,0,1,float16,float16,0,0.24347732464472452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,128,0,1,float16,fp8,0,0.24475733439127603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,128,0,1,fp8,fp8,0,0.2259999910990397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,128,0,1,float16,float16,0,0.2450933257738749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,128,0,1,fp8,fp8,0,0.2283573349316915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,128,0,1,float16,fp8,0,0.24613332748413086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,128,0,1,float16,float16,0,0.2512960036595662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,128,0,1,float16,fp8,0,0.2516000072161357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,128,0,1,float16,float16,0,0.15903466939926147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,128,0,1,fp8,fp8,0,0.23262399435043335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,128,0,1,float16,fp8,0,0.16335999965667725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,128,0,1,fp8,fp8,0,0.15176000197728476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,128,0,1,float16,float16,0,0.14054399728775024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,128,0,1,float16,fp8,0,0.14055466651916504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,128,0,1,fp8,fp8,0,0.13154133160909018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,128,0,1,float16,float16,0,0.1414400041103363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,128,0,1,float16,fp8,0,0.1421440045038859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,128,0,1,fp8,fp8,0,0.13057600458463034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,128,0,1,float16,float16,0,0.1446346640586853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,128,0,1,float16,fp8,0,0.14244266351064047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,128,0,1,fp8,fp8,0,0.13226667046546936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,128,0,1,float16,float16,0,0.14422399799029031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,128,0,1,float16,fp8,0,0.14945066968599954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,128,0,1,fp8,fp8,0,0.13863999644915262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,128,0,1,float16,float16,0,0.10083733002344768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,128,0,1,float16,fp8,0,0.10177600383758545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,128,0,1,fp8,fp8,0,0.0978666643301646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,128,0,1,float16,float16,0,0.09634666641553243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,128,0,1,float16,fp8,0,0.09645866354306538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,128,0,1,fp8,fp8,0,0.09324799974759419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,128,0,1,float16,float16,0,0.09573333462079366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,128,0,1,float16,fp8,0,0.09687999884287517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,128,0,1,fp8,fp8,0,0.09091732899347942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,128,0,1,float16,float16,0,0.09739200274149577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,128,0,1,float16,fp8,0,0.09637866417566936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,128,0,1,fp8,fp8,0,0.09310400485992432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,128,0,1,float16,float16,0,0.09636800487836202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,128,0,1,float16,fp8,0,0.09806399544080098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,128,0,1,fp8,fp8,0,0.09325866897900899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,128,0,1,fp8,fp8,0,1.9091413815816243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,128,0,1,float16,float16,0,2.1788533528645835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,128,0,1,float16,fp8,0,2.1434720357259116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,128,0,1,float16,float16,0,2.166624069213867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,128,0,1,fp8,fp8,0,1.9191999435424805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,128,0,1,float16,fp8,0,2.167973359425863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,128,0,1,float16,float16,0,2.1626879374186196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,128,0,1,float16,fp8,0,2.1945066452026367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,128,0,1,fp8,fp8,0,1.9388532638549805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,128,0,1,float16,float16,0,2.2365333239237466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,128,0,1,float16,fp8,0,2.3061653772989907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,128,0,1,fp8,fp8,0,1.9864373207092285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,128,0,1,float16,float16,0,1.2277226448059082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,128,0,1,fp8,fp8,0,1.1286346912384033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,128,0,1,float16,fp8,0,1.2955040136973064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,128,0,1,float16,float16,0,1.0860586961110432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,128,0,1,float16,fp8,0,1.0865333080291748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,128,0,1,fp8,fp8,0,0.9667946497599283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,128,0,1,float16,float16,0,1.085210641225179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,128,0,1,float16,fp8,0,1.0924159685770671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,128,0,1,fp8,fp8,0,1.102725346883138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,128,0,1,float16,float16,0,1.096453348795573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,128,0,1,fp8,fp8,0,1.0016319751739502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,128,0,1,float16,fp8,0,1.099834680557251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,128,0,1,float16,float16,0,1.1110666592915852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,128,0,1,float16,fp8,0,1.1286880175272624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,128,0,1,fp8,fp8,0,1.0107573668162029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,128,0,1,float16,float16,0,0.6331626574198405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,128,0,1,fp8,fp8,0,0.5892373323440552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,128,0,1,float16,fp8,0,0.6486080090204874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,128,0,1,float16,float16,0,0.589130679766337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,128,0,1,fp8,fp8,0,0.5081280072530111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,128,0,1,float16,fp8,0,0.5695039828618368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,128,0,1,float16,float16,0,0.5905813376108805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,128,0,1,fp8,fp8,0,0.510095993677775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,128,0,1,float16,fp8,0,0.5690826574961344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,128,0,1,float16,float16,0,0.5703413486480713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,128,0,1,fp8,fp8,0,0.5136320193608602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,128,0,1,float16,fp8,0,0.5763786633809408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,128,0,1,float16,float16,0,0.5802719990412394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,128,0,1,float16,fp8,0,0.5872000058492025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,128,0,1,fp8,fp8,0,0.5288373231887817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,128,0,1,float16,float16,0,0.33826132615407306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,128,0,1,fp8,fp8,0,0.31749866406122845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,128,0,1,float16,fp8,0,0.3463679949442546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,128,0,1,float16,float16,0,0.3018186688423157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,128,0,1,fp8,fp8,0,0.2760053277015686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,128,0,1,float16,fp8,0,0.30239999294281006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,128,0,1,float16,float16,0,0.3014666636784871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,128,0,1,float16,fp8,0,0.30404265721638996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,128,0,1,fp8,fp8,0,0.2775946656862895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,128,0,1,float16,float16,0,0.30451732873916626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,128,0,1,fp8,fp8,0,0.27938665946324664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,128,0,1,float16,fp8,0,0.30872533718744916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,128,0,1,fp8,fp8,0,0.28648000955581665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,128,0,1,float16,float16,0,0.3100106716156006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,128,0,1,float16,fp8,0,0.31542400519053143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,128,0,1,float16,float16,0,0.19088532527287802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,128,0,1,float16,fp8,0,0.19347200791041055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,128,0,1,fp8,fp8,0,0.17932265996932983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,128,0,1,float16,float16,0,0.1672746737798055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,128,0,1,float16,fp8,0,0.1636319955190023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,128,0,1,fp8,fp8,0,0.1549013356367747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,128,0,1,float16,float16,0,0.16662933429082236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,128,0,1,float16,fp8,0,0.1699946721394857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,128,0,1,fp8,fp8,0,0.15667200088500977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,128,0,1,float16,float16,0,0.1681493322054545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,128,0,1,float16,fp8,0,0.16899200280507407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,128,0,1,fp8,fp8,0,0.16114667057991028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,128,0,1,float16,float16,0,0.17403733730316162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,128,0,1,float16,fp8,0,0.1740106741587321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,128,0,1,fp8,fp8,0,0.16305599610010782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,128,0,1,float16,float16,0,0.11398399869600932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,128,0,1,float16,fp8,0,0.11634666721026103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,128,0,1,fp8,fp8,0,0.11007466912269592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,128,0,1,float16,float16,0,0.10196266571680705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,128,0,1,float16,fp8,0,0.10318932930628459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,128,0,1,fp8,fp8,0,0.0953653355439504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,128,0,1,float16,float16,0,0.10173867146174113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,128,0,1,float16,fp8,0,0.10335466265678406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,128,0,1,fp8,fp8,0,0.0960106650988261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,128,0,1,float16,float16,0,0.10188266634941101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,128,0,1,float16,fp8,0,0.10259200135866801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,128,0,1,fp8,fp8,0,0.09506133198738098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,128,0,1,float16,float16,0,0.10409599542617798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,128,0,1,float16,fp8,0,0.10409067074457805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,128,0,1,fp8,fp8,0,0.09738666812578838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,128,0,1,float16,float16,0,0.07326399783293407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,128,0,1,float16,fp8,0,0.07459733386834462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,128,0,1,fp8,fp8,0,0.07242666681607564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,128,0,1,float16,float16,0,0.07218133409818013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,128,0,1,float16,fp8,0,0.07173333565394084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,128,0,1,fp8,fp8,0,0.06822933256626129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,128,0,1,float16,float16,0,0.07246933380762736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,128,0,1,float16,fp8,0,0.07243200143178304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,128,0,1,fp8,fp8,0,0.06837333242098491
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,128,0,1,float16,float16,0,0.07242666681607564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,128,0,1,float16,fp8,0,0.07172800103823344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,128,0,1,fp8,fp8,0,0.0686773310105006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,128,0,1,float16,float16,0,0.07226133346557617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,128,0,1,float16,fp8,0,0.07419733206431071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,128,0,1,fp8,fp8,0,0.06858666737874348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,128,0,1,float16,fp8,0,2.282698631286621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,128,0,1,float16,float16,0,2.3063252766927085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,128,0,1,fp8,fp8,0,2.1375625928243003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,128,0,1,float16,float16,0,2.405061403910319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,128,0,1,fp8,fp8,0,2.2142507235209146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,128,0,1,float16,fp8,0,2.3058667182922363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,128,0,1,float16,float16,0,2.483781337738037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,128,0,1,float16,fp8,0,2.4504267374674478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,128,0,1,float16,float16,0,2.5117546717325845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,128,0,1,fp8,fp8,0,2.655456066131592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,128,0,1,float16,fp8,0,2.471189339955648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,128,0,1,fp8,fp8,0,2.5935093561808267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,128,0,1,float16,float16,0,1.3379732767740886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,128,0,1,float16,fp8,0,1.3541653951009114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,128,0,1,fp8,fp8,0,1.409978707631429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,128,0,1,float16,float16,0,1.1693600018819172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,128,0,1,fp8,fp8,0,1.0884959697723389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,128,0,1,float16,fp8,0,1.1594293117523193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,128,0,1,float16,float16,0,1.1831093629201253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,128,0,1,float16,fp8,0,1.1768213113149006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,128,0,1,fp8,fp8,0,1.1044639746348064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,128,0,1,float16,float16,0,1.2405386765797932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,128,0,1,float16,fp8,0,1.2234240372975667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,128,0,1,fp8,fp8,0,1.3168853123982747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,128,0,1,float16,float16,0,1.2512213389078777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,128,0,1,float16,fp8,0,1.2256373564402263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,128,0,1,fp8,fp8,0,1.2815039952596028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,128,0,1,float16,float16,0,0.6757226785024008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,128,0,1,float16,fp8,0,0.6706986427307129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,128,0,1,fp8,fp8,0,0.6665653387705485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,128,0,1,float16,float16,0,0.5996373494466146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,128,0,1,float16,fp8,0,0.5976906617482504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,128,0,1,fp8,fp8,0,0.5551093419392904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,128,0,1,float16,float16,0,0.6089706818262736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,128,0,1,float16,fp8,0,0.6023893356323242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,128,0,1,fp8,fp8,0,0.5678346554438273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,128,0,1,float16,float16,0,0.6297973394393921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,128,0,1,float16,fp8,0,0.6254133383433024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,128,0,1,fp8,fp8,0,0.6527200142542521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,128,0,1,float16,float16,0,0.62937064965566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,128,0,1,float16,fp8,0,0.6318133274714152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,128,0,1,fp8,fp8,0,0.6199040015538534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,128,0,1,float16,float16,0,0.3514080047607422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,128,0,1,float16,fp8,0,0.34785600503285724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,128,0,1,fp8,fp8,0,0.3454720179239909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,128,0,1,float16,float16,0,0.31380265951156616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,128,0,1,float16,fp8,0,0.3126986622810364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,128,0,1,fp8,fp8,0,0.2887626687685649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,128,0,1,float16,float16,0,0.316703995068868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,128,0,1,float16,fp8,0,0.31613866488138836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,128,0,1,fp8,fp8,0,0.29467199246088666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,128,0,1,float16,float16,0,0.32684266567230225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,128,0,1,float16,fp8,0,0.3243253429730733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,128,0,1,fp8,fp8,0,0.3204853336016337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,128,0,1,float16,float16,0,0.32891199986139935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,128,0,1,float16,fp8,0,0.3236266573270162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,128,0,1,float16,float16,0,0.18847467501958212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,128,0,1,float16,fp8,0,0.18641066551208496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,128,0,1,fp8,fp8,0,0.32320000727971393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,128,0,1,fp8,fp8,0,0.18406933546066284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,128,0,1,float16,float16,0,0.16562666495641074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,128,0,1,float16,fp8,0,0.16723734140396118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,128,0,1,fp8,fp8,0,0.15465600291887918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,128,0,1,float16,float16,0,0.1680799921353658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,128,0,1,float16,fp8,0,0.17012266318003336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,128,0,1,fp8,fp8,0,0.1583199997742971
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,128,0,1,float16,float16,0,0.17510932683944702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,128,0,1,float16,fp8,0,0.1746506690979004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,128,0,1,fp8,fp8,0,0.17160000403722128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,128,0,1,float16,float16,0,0.17633599042892456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,128,0,1,float16,fp8,0,0.17682133118311563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,128,0,1,fp8,fp8,0,0.16889599959055582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,128,0,1,float16,float16,0,0.10962667067845662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,128,0,1,float16,fp8,0,0.1074026624361674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,128,0,1,fp8,fp8,0,0.10644267002741496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,128,0,1,float16,float16,0,0.09368000427881877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,128,0,1,float16,fp8,0,0.09295466542243958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,128,0,1,fp8,fp8,0,0.087909330924352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,128,0,1,float16,float16,0,0.09492266178131104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,128,0,1,float16,fp8,0,0.0937013328075409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,128,0,1,fp8,fp8,0,0.08711999654769897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,128,0,1,float16,float16,0,0.09758399923642476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,128,0,1,float16,fp8,0,0.0979360044002533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,128,0,1,fp8,fp8,0,0.09249066313107808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,128,0,1,float16,float16,0,0.09703466296195984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,128,0,1,float16,fp8,0,0.09690666198730469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,128,0,1,float16,float16,0,0.060138667623202004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,128,0,1,fp8,fp8,0,0.09787733356157939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,128,0,1,float16,fp8,0,0.06071466704209646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,128,0,1,fp8,fp8,0,0.06039999922116598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,128,0,1,float16,float16,0,0.05804799993832906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,128,0,1,float16,fp8,0,0.057818666100502014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,128,0,1,fp8,fp8,0,0.05397333204746246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,128,0,1,float16,float16,0,0.05719999969005585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,128,0,1,float16,fp8,0,0.05815466741720835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,128,0,1,fp8,fp8,0,0.054645334680875145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,128,0,1,float16,float16,0,0.05955199897289276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,128,0,1,float16,fp8,0,0.059477334221204124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,128,0,1,fp8,fp8,0,0.05799466868241628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,128,0,1,float16,float16,0,0.059994667768478394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,128,0,1,float16,fp8,0,0.059258664647738137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,128,0,1,fp8,fp8,0,0.05780800183614095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,128,0,1,float16,float16,0,0.03949866692225138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,128,0,1,float16,fp8,0,0.03899733225504557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,128,0,1,fp8,fp8,0,0.03836799909671148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,128,0,1,float16,float16,0,0.03737599899371465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,128,0,1,float16,fp8,0,0.037402667105197906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,128,0,1,fp8,fp8,0,0.035674666364987694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,128,0,1,float16,float16,0,0.03748266647259394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,128,0,1,float16,fp8,0,0.03755733370780945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,128,0,1,fp8,fp8,0,0.03584533433119456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,128,0,1,float16,float16,0,0.038005332152048744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,128,0,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,128,0,1,fp8,fp8,0,0.03772266705830892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,128,0,1,float16,float16,0,0.037802666425704956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,128,0,1,float16,fp8,0,0.03770133356253306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,128,0,1,fp8,fp8,0,0.037818667789300285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,128,0,1,fp8,fp8,0,1.8431893984476726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,128,0,1,float16,fp8,0,1.9403893152872722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,128,0,1,float16,float16,0,1.9596907297770183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,128,0,1,float16,float16,0,2.0023999214172363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,128,0,1,float16,fp8,0,1.9581440289815266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,128,0,1,fp8,fp8,0,1.918933391571045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,128,0,1,float16,float16,0,2.1355199813842773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,128,0,1,float16,fp8,0,2.1081013679504395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,128,0,1,fp8,fp8,0,2.3504320780436196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,128,0,1,float16,float16,0,2.141599973042806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,128,0,1,float16,fp8,0,2.123535950978597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,128,0,1,float16,float16,0,1.14955735206604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,128,0,1,float16,fp8,0,1.151093324025472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,128,0,1,fp8,fp8,0,2.3221227327982583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,128,0,1,float16,float16,0,1.0002986590067546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,128,0,1,fp8,fp8,0,1.171072006225586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,128,0,1,float16,fp8,0,0.9852266311645508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,128,0,1,fp8,fp8,0,0.9352906545003256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,128,0,1,float16,float16,0,1.0091466903686523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,128,0,1,float16,fp8,0,0.9972853660583496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,128,0,1,fp8,fp8,0,0.9587893486022949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,128,0,1,float16,float16,0,1.064794699350993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,128,0,1,float16,fp8,0,1.046229362487793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,128,0,1,float16,float16,0,1.0630239645640056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,128,0,1,fp8,fp8,0,1.1714026927947998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,128,0,1,float16,fp8,0,1.0416800181070964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,128,0,1,fp8,fp8,0,1.1690026919047039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,128,0,1,float16,float16,0,0.580293337504069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,128,0,1,float16,fp8,0,0.5825866858164469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,128,0,1,fp8,fp8,0,0.5854186614354452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,128,0,1,float16,float16,0,0.5097920099894205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,128,0,1,fp8,fp8,0,0.4796266555786133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,128,0,1,float16,fp8,0,0.5065066814422607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,128,0,1,float16,float16,0,0.5200959841410319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,128,0,1,float16,fp8,0,0.5152586698532104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,128,0,1,fp8,fp8,0,0.4917866786321004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,128,0,1,float16,float16,0,0.5374079942703247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,128,0,1,float16,fp8,0,0.5314773321151733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,128,0,1,fp8,fp8,0,0.5776960055033366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,128,0,1,float16,float16,0,0.5357760190963745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,128,0,1,float16,fp8,0,0.5322453180948893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,128,0,1,fp8,fp8,0,0.5426880121231079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,128,0,1,float16,float16,0,0.30382933219273883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,128,0,1,float16,fp8,0,0.2978773315747579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,128,0,1,fp8,fp8,0,0.3035893241564433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,128,0,1,float16,float16,0,0.26518932978312176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,128,0,1,fp8,fp8,0,0.24931732813517252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,128,0,1,float16,fp8,0,0.2658613324165344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,128,0,1,float16,float16,0,0.26920000712076825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,128,0,1,float16,fp8,0,0.26848000288009644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,128,0,1,fp8,fp8,0,0.2569119930267334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,128,0,1,float16,float16,0,0.2779146631558736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,128,0,1,float16,fp8,0,0.2757866581281026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,128,0,1,fp8,fp8,0,0.2805973291397095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,128,0,1,float16,float16,0,0.28088533878326416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,128,0,1,float16,fp8,0,0.2770773371060689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,128,0,1,fp8,fp8,0,0.27482666571935016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,128,0,1,float16,float16,0,0.16272000471750894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,128,0,1,float16,fp8,0,0.16217066844304404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,128,0,1,fp8,fp8,0,0.16291733582814535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,128,0,1,float16,float16,0,0.1406880021095276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,128,0,1,float16,fp8,0,0.13873066504796347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,128,0,1,fp8,fp8,0,0.13116266330083212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,128,0,1,float16,float16,0,0.14193066954612732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,128,0,1,float16,fp8,0,0.1416373352209727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,128,0,1,fp8,fp8,0,0.13609066605567932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,128,0,1,float16,float16,0,0.1474240024884542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,128,0,1,float16,fp8,0,0.1476533313592275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,128,0,1,fp8,fp8,0,0.147407998641332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,128,0,1,float16,float16,0,0.15011733770370483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,128,0,1,float16,fp8,0,0.1492746671040853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,128,0,1,fp8,fp8,0,0.14843733112017313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,128,0,1,float16,float16,0,0.09380267063776652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,128,0,1,float16,fp8,0,0.09281599521636963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,128,0,1,fp8,fp8,0,0.09213333328564961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,128,0,1,float16,float16,0,0.07835199932257335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,128,0,1,float16,fp8,0,0.0771679977575938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,128,0,1,fp8,fp8,0,0.07450133562088013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,128,0,1,float16,float16,0,0.07876800000667572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,128,0,1,float16,fp8,0,0.07910933097203572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,128,0,1,fp8,fp8,0,0.07422400017579396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,128,0,1,float16,float16,0,0.08074666559696198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,128,0,1,float16,fp8,0,0.0809440016746521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,128,0,1,fp8,fp8,0,0.07981333136558533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,128,0,1,float16,float16,0,0.08084266881148021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,128,0,1,float16,fp8,0,0.08072533210118611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,128,0,1,float16,float16,0,0.051551997661590576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,128,0,1,fp8,fp8,0,0.08333333333333333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,128,0,1,float16,fp8,0,0.0503359983364741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,128,0,1,fp8,fp8,0,0.054058666030565895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,128,0,1,float16,float16,0,0.04795200129350027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,128,0,1,float16,fp8,0,0.047685335079828896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,128,0,1,fp8,fp8,0,0.04585599899291992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,128,0,1,float16,float16,0,0.04782933493455251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,128,0,1,float16,float16,0,0.04970133304595947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,128,0,1,fp8,fp8,0,0.04584000011285146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,128,0,1,float16,fp8,0,0.04814399778842926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,128,0,1,float16,fp8,0,0.050053333242734276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,128,0,1,fp8,fp8,0,0.050160000721613564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,128,0,1,float16,float16,0,0.04993066688378652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,128,0,1,float16,fp8,0,0.04983466863632202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,128,0,1,fp8,fp8,0,0.04975999891757965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,128,0,1,float16,float16,0,0.033386667569478355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,128,0,1,float16,fp8,0,0.03342933456103007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,128,0,1,fp8,fp8,0,0.03370666752258936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,128,0,1,float16,float16,0,0.03153600047032038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,128,0,1,fp8,fp8,0,0.031658666829268135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,128,0,1,float16,fp8,0,0.03164266546567281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,128,0,1,float16,float16,0,0.03148266673088074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,128,0,1,fp8,fp8,0,0.031114667654037476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,128,0,1,float16,fp8,0,0.03142933299144109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,128,0,1,float16,float16,0,0.03169599920511246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,128,0,1,float16,fp8,0,0.0335359995563825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,128,0,1,fp8,fp8,0,0.03162133445342382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,128,0,1,float16,float16,0,0.031530665854612984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,128,0,1,float16,fp8,0,0.03346666693687439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,128,0,1,fp8,fp8,0,0.03148799886306127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,128,0,1,float16,float16,0,0.027632000545660656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,128,0,1,float16,fp8,0,0.027349332968393963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,128,0,1,fp8,fp8,0,0.027274665733178455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,128,0,1,float16,float16,0,0.02737066646416982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,128,0,1,float16,fp8,0,0.02717866748571396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,128,0,1,fp8,fp8,0,0.02589333305756251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,128,0,1,float16,float16,0,0.027269333600997925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,128,0,1,float16,fp8,0,0.026682667434215546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,128,0,1,fp8,fp8,0,0.025386666258176167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,128,0,1,float16,float16,0,0.027664000789324444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,128,0,1,float16,fp8,0,0.02737066646416982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,128,0,1,fp8,fp8,0,0.027317332724730175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,128,0,1,float16,float16,0,0.0273333340883255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,128,0,1,float16,fp8,0,0.027429332335789997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,128,0,1,fp8,fp8,0,0.02754666656255722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,128,0,1,float16,float16,0,0.903445323308309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,128,0,1,fp8,fp8,0,0.8465173244476318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,128,0,1,float16,fp8,0,0.8932533264160156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,128,0,1,float16,float16,0,0.9171733061472574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,128,0,1,fp8,fp8,0,0.880341370900472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,128,0,1,float16,fp8,0,0.903557300567627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,128,0,1,float16,fp8,0,0.9577493667602539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,128,0,1,float16,float16,0,0.9742773373921713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,128,0,1,fp8,fp8,0,1.0903573036193848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,128,0,1,float16,fp8,0,0.9586613178253174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,128,0,1,float16,float16,0,0.977125326792399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,128,0,1,fp8,fp8,0,1.0765653451283772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,128,0,1,float16,float16,0,0.5350773334503174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,128,0,1,float16,fp8,0,0.5262186527252197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,128,0,1,fp8,fp8,0,0.5416533152262369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,128,0,1,float16,float16,0,0.4614666700363159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,128,0,1,float16,fp8,0,0.45579731464385986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,128,0,1,fp8,fp8,0,0.4350879987080892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,128,0,1,float16,float16,0,0.46767465273539227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,128,0,1,float16,fp8,0,0.4622186819712321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,128,0,1,fp8,fp8,0,0.44752001762390137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,128,0,1,float16,fp8,0,0.4843626817067464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,128,0,1,float16,float16,0,0.4889920155207316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,128,0,1,fp8,fp8,0,0.5351413488388062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,128,0,1,float16,float16,0,0.28065599997838336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,128,0,1,float16,float16,0,0.4901546637217204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,128,0,1,float16,fp8,0,0.4853920141855876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,128,0,1,fp8,fp8,0,0.5338720083236694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,128,0,1,float16,fp8,0,0.2752853234608968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,128,0,1,fp8,fp8,0,0.2826559940973918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,128,0,1,float16,float16,0,0.24035199483235678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,128,0,1,float16,fp8,0,0.24073066314061484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,128,0,1,fp8,fp8,0,0.2262880007425944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,128,0,1,float16,float16,0,0.24480533599853516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,128,0,1,float16,fp8,0,0.24338134129842123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,128,0,1,fp8,fp8,0,0.2334293325742086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,128,0,1,float16,float16,0,0.25428799788157147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,128,0,1,float16,fp8,0,0.25269333521525067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,128,0,1,fp8,fp8,0,0.2586933374404907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,128,0,1,float16,float16,0,0.2540586590766907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,128,0,1,float16,fp8,0,0.25362666447957355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,128,0,1,float16,float16,0,0.15057599544525146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,128,0,1,fp8,fp8,0,0.25380265712738037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,128,0,1,float16,fp8,0,0.14858667055765787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,128,0,1,fp8,fp8,0,0.15037866433461508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,128,0,1,float16,float16,0,0.12600533167521158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,128,0,1,float16,fp8,0,0.1239466667175293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,128,0,1,fp8,fp8,0,0.11893866459528606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,128,0,1,float16,float16,0,0.12779200077056885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,128,0,1,float16,fp8,0,0.1262986660003662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,128,0,1,fp8,fp8,0,0.12387200196584065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,128,0,1,float16,float16,0,0.13378666838010153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,128,0,1,float16,fp8,0,0.1339306632677714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,128,0,1,fp8,fp8,0,0.13503467043240866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,128,0,1,float16,float16,0,0.1346986691157023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,128,0,1,float16,fp8,0,0.13555199901262918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,128,0,1,fp8,fp8,0,0.1372213363647461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,128,0,1,float16,float16,0,0.08715200424194336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,128,0,1,float16,fp8,0,0.08687999844551086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,128,0,1,float16,float16,0,0.07061333457628886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,128,0,1,fp8,fp8,0,0.08585600058237712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,128,0,1,float16,fp8,0,0.07044266661008199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,128,0,1,fp8,fp8,0,0.06807466844717662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,128,0,1,float16,float16,0,0.07270933190981548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,128,0,1,float16,fp8,0,0.07236266632874806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,128,0,1,fp8,fp8,0,0.06700799862543742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,128,0,1,float16,float16,0,0.07458666463692983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,128,0,1,float16,fp8,0,0.07402133444945018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,128,0,1,fp8,fp8,0,0.07524266839027405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,128,0,1,float16,float16,0,0.07455466687679291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,128,0,1,float16,fp8,0,0.07452266911665599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,128,0,1,float16,float16,0,0.04574933151404063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,128,0,1,float16,fp8,0,0.04409066836039225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,128,0,1,fp8,fp8,0,0.07499733567237854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,128,0,1,fp8,fp8,0,0.04774933556715647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,128,0,1,float16,float16,0,0.04182399809360504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,128,0,1,float16,fp8,0,0.04165333261092504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,128,0,1,fp8,fp8,0,0.04155733436346054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,128,0,1,float16,float16,0,0.04167466859022776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,128,0,1,float16,fp8,0,0.04236799975236257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,128,0,1,fp8,fp8,0,0.04155733436346054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,128,0,1,float16,float16,0,0.0435146689414978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,128,0,1,float16,fp8,0,0.04374399781227112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,128,0,1,fp8,fp8,0,0.04377600053946177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,128,0,1,float16,float16,0,0.043765331308046974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,128,0,1,float16,fp8,0,0.045381332437197365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,128,0,1,fp8,fp8,0,0.04379733403523763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,128,0,1,float16,float16,0,0.03124266614516576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,128,0,1,float16,fp8,0,0.02961066613594691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,128,0,1,fp8,fp8,0,0.029338667790095013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,128,0,1,float16,float16,0,0.027850667635599773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,128,0,1,float16,fp8,0,0.028016000986099243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,128,0,1,fp8,fp8,0,0.027530667682488758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,128,0,1,float16,float16,0,0.029125332832336426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,128,0,1,float16,fp8,0,0.02941333254178365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,128,0,1,fp8,fp8,0,0.027434666951497395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,128,0,1,float16,float16,0,0.02956266701221466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,128,0,1,float16,fp8,0,0.02975466599067052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,128,0,1,fp8,fp8,0,0.02956266701221466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,128,0,1,float16,float16,0,0.029743999242782593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,128,0,1,float16,fp8,0,0.029685333371162415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,128,0,1,fp8,fp8,0,0.031018666923046112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,128,0,1,float16,float16,0,0.025216000775496166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,128,0,1,float16,fp8,0,0.025434667865435284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,128,0,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,128,0,1,float16,float16,0,0.024117333193620045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,128,0,1,float16,fp8,0,0.023765332996845245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,128,0,1,fp8,fp8,0,0.023071999351183575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,128,0,1,float16,float16,0,0.02515733242034912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,128,0,1,float16,fp8,0,0.023306667804718018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,128,0,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,128,0,1,float16,float16,0,0.025125332176685333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,128,0,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,128,0,1,fp8,fp8,0,0.023402666052182514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,128,0,1,float16,float16,0,0.023557332654794056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,128,0,1,float16,fp8,0,0.025018667181332905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,128,0,1,fp8,fp8,0,0.024864000578721363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,128,0,1,float16,float16,0,0.02145066608985265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,128,0,1,float16,fp8,0,0.021226666867733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,128,0,1,fp8,fp8,0,0.021226666867733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,128,0,1,float16,float16,0,0.021253332495689392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,128,0,1,float16,fp8,0,0.02160000056028366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,128,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,128,0,1,float16,float16,0,0.02142400046189626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,128,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,128,0,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,128,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,128,0,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,128,0,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,128,0,1,float16,float16,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,128,0,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,128,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,128,0,1,float16,float16,0,0.4891466697057088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,128,0,1,fp8,fp8,0,0.4686400095621745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,128,0,1,float16,fp8,0,0.48152534166971844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,128,0,1,float16,float16,0,0.5037333170572916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,128,0,1,float16,fp8,0,0.4993120034535726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,128,0,1,fp8,fp8,0,0.4811360041300456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,128,0,1,float16,float16,0,0.5254666805267334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,128,0,1,float16,fp8,0,0.5211146672566732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,128,0,1,fp8,fp8,0,0.5632053216298422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,128,0,1,float16,float16,0,0.526688019434611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,128,0,1,float16,fp8,0,0.5201599995295206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,128,0,1,fp8,fp8,0,0.5581333239873251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,128,0,1,float16,float16,0,0.2921813329060872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,128,0,1,fp8,fp8,0,0.29571733872095746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,128,0,1,float16,float16,0,0.2523146669069926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,128,0,1,float16,fp8,0,0.288917342821757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,128,0,1,float16,fp8,0,0.25058666865030926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,128,0,1,fp8,fp8,0,0.24054400126139322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,128,0,1,float16,float16,0,0.2606559991836548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,128,0,1,float16,fp8,0,0.2566879987716675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,128,0,1,fp8,fp8,0,0.24910932779312134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,128,0,1,float16,float16,0,0.269813338915507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,128,0,1,float16,fp8,0,0.2686026692390442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,128,0,1,fp8,fp8,0,0.2707680066426595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,128,0,1,float16,float16,0,0.2714453339576721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,128,0,1,float16,fp8,0,0.26764265696207684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,128,0,1,float16,float16,0,0.15572800238927206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,128,0,1,fp8,fp8,0,0.27164266506830853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,128,0,1,float16,fp8,0,0.1536960005760193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,128,0,1,fp8,fp8,0,0.15722666184107462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,128,0,1,float16,float16,0,0.1346879998842875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,128,0,1,float16,fp8,0,0.13291733463605246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,128,0,1,fp8,fp8,0,0.12753066420555115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,128,0,1,float16,float16,0,0.13808000087738037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,128,0,1,float16,fp8,0,0.13638933499654135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,128,0,1,fp8,fp8,0,0.13291200002034506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,128,0,1,float16,float16,0,0.14436800281206766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,128,0,1,float16,fp8,0,0.14189866185188293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,128,0,1,fp8,fp8,0,0.14258666833241782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,128,0,1,float16,float16,0,0.14505599935849509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,128,0,1,float16,fp8,0,0.14361066619555155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,128,0,1,fp8,fp8,0,0.14056533575057983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,128,0,1,float16,float16,0,0.08716266353925069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,128,0,1,float16,fp8,0,0.08700799942016602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,128,0,1,fp8,fp8,0,0.08800533413887024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,128,0,1,float16,float16,0,0.0724533349275589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,128,0,1,float16,fp8,0,0.0708000014225642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,128,0,1,fp8,fp8,0,0.07060799996058147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,128,0,1,float16,float16,0,0.07249066730340321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,128,0,1,float16,fp8,0,0.07257600128650665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,128,0,1,fp8,fp8,0,0.0701386680205663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,128,0,1,float16,float16,0,0.07677866518497467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,128,0,1,float16,fp8,0,0.07650133470694225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,128,0,1,fp8,fp8,0,0.07671999931335449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,128,0,1,float16,float16,0,0.07818133135636647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,128,0,1,float16,fp8,0,0.07793599863847096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,128,0,1,fp8,fp8,0,0.07667199770609538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,128,0,1,float16,float16,0,0.04911999901135763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,128,0,1,float16,fp8,0,0.04713066418965658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,128,0,1,fp8,fp8,0,0.049973333875338234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,128,0,1,float16,float16,0,0.04560000201066335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,128,0,1,float16,fp8,0,0.043824002146720886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,128,0,1,fp8,fp8,0,0.04162666698296865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,128,0,1,float16,float16,0,0.04594666759173075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,128,0,1,float16,fp8,0,0.04366933306058248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,128,0,1,fp8,fp8,0,0.04178666571776072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,128,0,1,float16,float16,0,0.04701333244641622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,128,0,1,float16,fp8,0,0.04565866788228353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,128,0,1,fp8,fp8,0,0.04572799801826477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,128,0,1,float16,float16,0,0.045882667104403176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,128,0,1,float16,fp8,0,0.04597333570321401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,128,0,1,fp8,fp8,0,0.04587199787298838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,128,0,1,float16,float16,0,0.031471999982992806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,128,0,1,float16,fp8,0,0.031386665999889374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,128,0,1,fp8,fp8,0,0.03145066648721695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,128,0,1,float16,float16,0,0.029706666866938274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,128,0,1,float16,fp8,0,0.02940266579389572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,128,0,1,fp8,fp8,0,0.02942399928967158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,128,0,1,float16,float16,0,0.02974933385848999
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,128,0,1,float16,fp8,0,0.029365333418051403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,128,0,1,fp8,fp8,0,0.029370665550231934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,128,0,1,float16,float16,0,0.02977599948644638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,128,0,1,float16,fp8,0,0.031498665610949196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,128,0,1,fp8,fp8,0,0.02938666691382726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,128,0,1,float16,float16,0,0.03102933367093404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,128,0,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,128,0,1,fp8,fp8,0,0.03141866624355316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,128,0,1,float16,float16,0,0.021338666478792827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,128,0,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,128,0,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,128,0,1,float16,float16,0,0.021189334491888683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,128,0,1,float16,fp8,0,0.021359999974568684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,128,0,1,fp8,fp8,0,0.021183999876181286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,128,0,1,float16,float16,0,0.02141333371400833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,128,0,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,128,0,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,128,0,1,float16,float16,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,128,0,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,128,0,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,128,0,1,float16,fp8,0,0.0216799999276797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,128,0,1,float16,float16,0,0.021482666333516438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,128,0,1,fp8,fp8,0,0.021397332350413006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,128,0,1,float16,float16,0,0.01930133377512296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,128,0,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,128,0,1,fp8,fp8,0,0.019546666493018467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,128,0,1,float16,float16,0,0.01756799966096878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,128,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,128,0,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,128,0,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,128,0,1,float16,float16,0,0.017680000513792038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,128,0,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,128,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,128,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,128,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,128,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,128,0,1,float16,float16,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,128,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,128,0,1,float16,float16,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,128,0,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,128,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,128,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,128,0,1,float16,fp8,0,0.018346666047970455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,128,0,1,float16,float16,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,128,0,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,128,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,128,0,1,float16,float16,0,0.018837332725524902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,128,0,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,128,0,1,float16,fp8,0,0.3149919907251994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,128,0,1,fp8,fp8,0,0.3079093297322591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,128,0,1,float16,float16,0,0.3190773328145345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,128,0,1,fp8,fp8,0,0.30856533845265705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,128,0,1,float16,fp8,0,0.3197173277537028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,128,0,1,float16,float16,0,0.32309865951538086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,128,0,1,float16,float16,0,0.3390880028406779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,128,0,1,float16,fp8,0,0.3338293234507243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,128,0,1,fp8,fp8,0,0.33470932642618817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,128,0,1,float16,float16,0,0.33826665083567303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,128,0,1,float16,fp8,0,0.3317386706670125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,128,0,1,float16,float16,0,0.18844266732533774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,128,0,1,fp8,fp8,0,0.3412586847941081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,128,0,1,float16,fp8,0,0.18449600537618002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,128,0,1,fp8,fp8,0,0.18820800383885702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,128,0,1,float16,float16,0,0.16685867309570312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,128,0,1,float16,fp8,0,0.16545599699020386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,128,0,1,fp8,fp8,0,0.16037333011627197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,128,0,1,float16,float16,0,0.16857600212097168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,128,0,1,float16,fp8,0,0.1678239901860555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,128,0,1,fp8,fp8,0,0.16105066736539206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,128,0,1,float16,float16,0,0.17590399583180746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,128,0,1,float16,fp8,0,0.1758613387743632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,128,0,1,fp8,fp8,0,0.17384533087412515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,128,0,1,float16,float16,0,0.17721066872278848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,128,0,1,float16,fp8,0,0.17502933740615845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,128,0,1,fp8,fp8,0,0.17605332533518472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,128,0,1,float16,float16,0,0.10352533062299092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,128,0,1,float16,fp8,0,0.10201066732406616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,128,0,1,fp8,fp8,0,0.10433600346247356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,128,0,1,float16,float16,0,0.09100799759229024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,128,0,1,float16,fp8,0,0.09067199627558391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,128,0,1,fp8,fp8,0,0.0869706670443217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,128,0,1,float16,float16,0,0.09106133381525676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,128,0,1,float16,fp8,0,0.0897920032342275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,128,0,1,fp8,fp8,0,0.08689600229263306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,128,0,1,float16,float16,0,0.09402666489283244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,128,0,1,float16,fp8,0,0.09364266196886699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,128,0,1,fp8,fp8,0,0.09303999940554301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,128,0,1,float16,float16,0,0.09537067015965779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,128,0,1,float16,fp8,0,0.09531199932098389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,128,0,1,float16,float16,0,0.05551999807357788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,128,0,1,fp8,fp8,0,0.09533333778381348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,128,0,1,float16,fp8,0,0.054229333996772766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,128,0,1,fp8,fp8,0,0.05765333275000254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,128,0,1,float16,float16,0,0.05189333359400431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,128,0,1,float16,fp8,0,0.0517439991235733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,128,0,1,fp8,fp8,0,0.04995200037956238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,128,0,1,float16,float16,0,0.05212800204753876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,128,0,1,float16,fp8,0,0.05204799771308899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,128,0,1,fp8,fp8,0,0.04982399940490723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,128,0,1,float16,float16,0,0.054048001766204834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,128,0,1,float16,fp8,0,0.05409066875775655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,128,0,1,fp8,fp8,0,0.05195199946562449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,128,0,1,float16,float16,0,0.05406933526198069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,128,0,1,float16,fp8,0,0.05394133428732554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,128,0,1,float16,float16,0,0.037589333951473236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,128,0,1,fp8,fp8,0,0.05388266841570536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,128,0,1,float16,fp8,0,0.03760000069936117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,128,0,1,fp8,fp8,0,0.03555200000603994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,128,0,1,float16,float16,0,0.035674666364987694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,128,0,1,float16,fp8,0,0.0351200004418691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,128,0,1,fp8,fp8,0,0.033488000432650246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,128,0,1,float16,float16,0,0.03544000039498011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,128,0,1,float16,fp8,0,0.035189333061377205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,128,0,1,float16,float16,0,0.03575466573238373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,128,0,1,fp8,fp8,0,0.03510933369398117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,128,0,1,float16,fp8,0,0.036687999963760376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,128,0,1,fp8,fp8,0,0.03550933301448822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,128,0,1,float16,float16,0,0.035546667873859406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,128,0,1,float16,fp8,0,0.03731200098991394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,128,0,1,float16,float16,0,0.023893333971500397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,128,0,1,fp8,fp8,0,0.03570666660865148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,128,0,1,float16,fp8,0,0.02510400116443634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,128,0,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,128,0,1,float16,float16,0,0.023200000325838726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,128,0,1,float16,fp8,0,0.0233599990606308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,128,0,1,fp8,fp8,0,0.023370665808518726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,128,0,1,float16,float16,0,0.023242667317390442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,128,0,1,float16,fp8,0,0.02348800003528595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,128,0,1,fp8,fp8,0,0.023498666783173878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,128,0,1,float16,float16,0,0.023232000569502514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,128,0,1,float16,fp8,0,0.023621333142121632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,128,0,1,fp8,fp8,0,0.025461333493391674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,128,0,1,float16,float16,0,0.025290665527184803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,128,0,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,128,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,128,0,1,fp8,fp8,0,0.025237334271272022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,128,0,1,float16,fp8,0,0.019434666881958645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,128,0,1,fp8,fp8,0,0.019738666713237762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,128,0,1,float16,float16,0,0.019013332823912304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,128,0,1,float16,fp8,0,0.020517333100239437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,128,0,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,128,0,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,128,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,128,0,1,float16,float16,0,0.017759999881188076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,128,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,128,0,1,float16,float16,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,128,0,1,float16,fp8,0,0.018112000077962875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,128,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,128,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,128,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,128,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,128,0,1,float16,fp8,0,0.017984000345071156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,128,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,128,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,128,0,1,fp8,fp8,0,0.017397332936525345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,128,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,128,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,128,0,1,fp8,fp8,0,0.017525333911180496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,128,0,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,128,0,1,float16,float16,0,0.01658133293191592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,128,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,128,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,128,0,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,128,0,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,128,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,128,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,128,0,1,float16,float16,0,0.23848533630371094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,128,0,1,float16,fp8,0,0.23745600382486978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,128,0,1,fp8,fp8,0,0.2282080054283142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,128,0,1,float16,float16,0,0.23969600598017374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,128,0,1,float16,fp8,0,0.23853333791097006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,128,0,1,fp8,fp8,0,0.22875734170277914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,128,0,1,float16,float16,0,0.24342399835586548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,128,0,1,float16,fp8,0,0.24609599510828653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,128,0,1,fp8,fp8,0,0.24083733558654785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,128,0,1,float16,float16,0,0.24547199408213297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,128,0,1,float16,fp8,0,0.24678399165471396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,128,0,1,fp8,fp8,0,0.23971199989318848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,128,0,1,float16,float16,0,0.13638400038083395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,128,0,1,float16,fp8,0,0.1379680037498474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,128,0,1,fp8,fp8,0,0.13661332925160727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,128,0,1,float16,float16,0,0.1264586647351583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,128,0,1,float16,fp8,0,0.12648533781369528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,128,0,1,fp8,fp8,0,0.12124266227086385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,128,0,1,float16,float16,0,0.12774399916330972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,128,0,1,float16,fp8,0,0.12819199760754904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,128,0,1,fp8,fp8,0,0.12050132950146993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,128,0,1,float16,float16,0,0.1295199990272522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,128,0,1,float16,fp8,0,0.1304800013701121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,128,0,1,fp8,fp8,0,0.12780800461769104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,128,0,1,float16,float16,0,0.1301866670449575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,128,0,1,float16,fp8,0,0.13040000200271606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,128,0,1,fp8,fp8,0,0.1292746663093567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,128,0,1,float16,float16,0,0.07342933118343353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,128,0,1,float16,fp8,0,0.07228800157705943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,128,0,1,fp8,fp8,0,0.07281599938869476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,128,0,1,float16,float16,0,0.07048533360163371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,128,0,1,float16,fp8,0,0.06993600229422252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,128,0,1,fp8,fp8,0,0.06668266654014587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,128,0,1,float16,float16,0,0.07062933345635732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,128,0,1,float16,fp8,0,0.06952000161012013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,128,0,1,fp8,fp8,0,0.0683786670366923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,128,0,1,float16,float16,0,0.07188799977302551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,128,0,1,float16,fp8,0,0.07228800157705943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,128,0,1,fp8,fp8,0,0.07090133428573608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,128,0,1,float16,float16,0,0.07092266778151195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,128,0,1,float16,fp8,0,0.07259733478228252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,128,0,1,fp8,fp8,0,0.07018133501211803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,128,0,1,float16,float16,0,0.045594667394955955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,128,0,1,float16,fp8,0,0.04378133515516917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,128,0,1,fp8,fp8,0,0.04358399907747904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,128,0,1,float16,float16,0,0.04162133236726125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,128,0,1,float16,fp8,0,0.041877334316571556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,128,0,1,float16,float16,0,0.0420959989229838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,128,0,1,float16,fp8,0,0.04165333261092504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,128,0,1,fp8,fp8,0,0.042037333051363625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,128,0,1,fp8,fp8,0,0.04165333261092504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,128,0,1,float16,float16,0,0.043290664752324425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,128,0,1,float16,fp8,0,0.04365866879622141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,128,0,1,float16,float16,0,0.04394133388996124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,128,0,1,fp8,fp8,0,0.04399999976158142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,128,0,1,float16,fp8,0,0.04394133388996124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,128,0,1,fp8,fp8,0,0.04400533437728882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,128,0,1,float16,float16,0,0.029578665892283123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,128,0,1,fp8,fp8,0,0.029520000020662945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,128,0,1,float16,float16,0,0.029322666426499683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,128,0,1,float16,fp8,0,0.029557332396507263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,128,0,1,float16,fp8,0,0.029071999092896778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,128,0,1,fp8,fp8,0,0.029701332251230877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,128,0,1,float16,float16,0,0.029253333806991577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,128,0,1,float16,fp8,0,0.02932800104220708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,128,0,1,fp8,fp8,0,0.027514666318893433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,128,0,1,float16,float16,0,0.029418667157491047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,128,0,1,float16,fp8,0,0.02938133229811986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,128,0,1,fp8,fp8,0,0.029365333418051403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,128,0,1,float16,float16,0,0.02959999938805898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,128,0,1,float16,fp8,0,0.029418667157491047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,128,0,1,fp8,fp8,0,0.029391999046007793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,128,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,128,0,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,128,0,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,128,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,128,0,1,float16,fp8,0,0.021402666966120403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,128,0,1,fp8,fp8,0,0.020655999581019085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,128,0,1,float16,float16,0,0.020970667401949566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,128,0,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,128,0,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,128,0,1,float16,float16,0,0.021525333325068157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,128,0,1,float16,fp8,0,0.020917333662509918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,128,0,1,float16,float16,0,0.021525333325068157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,128,0,1,fp8,fp8,0,0.021727999051411945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,128,0,1,float16,fp8,0,0.021253332495689392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,128,0,1,float16,float16,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,128,0,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,128,0,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,128,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,128,0,1,fp8,fp8,0,0.016704000532627106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,128,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,128,0,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,128,0,1,fp8,fp8,0,0.017664000391960144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,128,0,1,float16,float16,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,128,0,1,float16,fp8,0,0.01747200017174085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,128,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,128,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,128,0,1,float16,float16,0,0.015765332927306492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,128,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,128,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,128,0,1,float16,float16,0,0.01553600033124288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,128,0,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,128,0,1,float16,float16,0,0.016789333273967106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,128,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,128,0,1,float16,float16,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,128,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,128,0,1,float16,fp8,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,128,0,1,fp8,fp8,0,0.01563199982047081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,128,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,128,0,1,float16,fp8,0,0.016741332908471424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,128,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,128,0,1,float16,float16,0,0.19790933529535928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,128,0,1,float16,fp8,0,0.19789334138234457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,128,0,1,fp8,fp8,0,0.18959466616312662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,128,0,1,float16,float16,0,0.1990293264389038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,128,0,1,fp8,fp8,0,0.18942399819691977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,128,0,1,float16,fp8,0,0.19631999731063843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,128,0,1,float16,float16,0,0.20016533136367798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,128,0,1,float16,fp8,0,0.2011893391609192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,128,0,1,fp8,fp8,0,0.1956160068511963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,128,0,1,float16,float16,0,0.2011893391609192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,128,0,1,float16,fp8,0,0.2005013426144918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,128,0,1,fp8,fp8,0,0.19965332746505737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,128,0,1,float16,float16,0,0.10808533430099487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,128,0,1,float16,fp8,0,0.10815999905268352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,128,0,1,fp8,fp8,0,0.10961600144704182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,128,0,1,float16,float16,0,0.10440533359845479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,128,0,1,float16,fp8,0,0.10397332906723022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,128,0,1,fp8,fp8,0,0.10136533776919048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,128,0,1,float16,float16,0,0.1053493320941925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,128,0,1,float16,fp8,0,0.10356799761454265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,128,0,1,fp8,fp8,0,0.10157333811124165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,128,0,1,float16,float16,0,0.10568533341089885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,128,0,1,float16,fp8,0,0.1067573328812917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,128,0,1,fp8,fp8,0,0.10876267155011494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,128,0,1,float16,float16,0,0.10757866501808167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,128,0,1,float16,float16,0,0.06301333506902058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,128,0,1,fp8,fp8,0,0.1049013336499532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,128,0,1,float16,fp8,0,0.10731732845306396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,128,0,1,fp8,fp8,0,0.062074666221936546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,128,0,1,float16,float16,0,0.05981333553791046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,128,0,1,float16,fp8,0,0.060346667965253196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,128,0,1,float16,fp8,0,0.060831998785336815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,128,0,1,fp8,fp8,0,0.060122668743133545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,128,0,1,float16,fp8,0,0.06036800146102905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,128,0,1,float16,float16,0,0.06055466830730438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,128,0,1,fp8,fp8,0,0.057999998331069946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,128,0,1,float16,float16,0,0.06058133145173391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,128,0,1,float16,fp8,0,0.0621013343334198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,128,0,1,fp8,fp8,0,0.060720001657803856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,128,0,1,float16,float16,0,0.060789331793785095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,128,0,1,float16,fp8,0,0.060191998879114784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,128,0,1,float16,float16,0,0.037434667348861694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,128,0,1,fp8,fp8,0,0.06041066845258077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,128,0,1,float16,fp8,0,0.037632000943024956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,128,0,1,fp8,fp8,0,0.03751999884843826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,128,0,1,float16,float16,0,0.03600533306598663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,128,0,1,float16,fp8,0,0.035599999129772186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,128,0,1,fp8,fp8,0,0.03749333322048187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,128,0,1,float16,float16,0,0.037503999968369804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,128,0,1,float16,fp8,0,0.045312002301216125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,128,0,1,fp8,fp8,0,0.03732266773780187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,128,0,1,float16,float16,0,0.03738666574160258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,128,0,1,float16,fp8,0,0.03739733248949051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,128,0,1,fp8,fp8,0,0.037530665596326195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,128,0,1,float16,float16,0,0.0377813329299291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,128,0,1,float16,fp8,0,0.03791466603676478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,128,0,1,fp8,fp8,0,0.037632000943024956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,128,0,1,float16,float16,0,0.027221334477265675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,128,0,1,float16,fp8,0,0.02752000093460083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,128,0,1,fp8,fp8,0,0.025237334271272022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,128,0,1,float16,float16,0,0.025263999899228413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,128,0,1,float16,fp8,0,0.025413334369659424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,128,0,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,128,0,1,float16,float16,0,0.025445332129796345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,128,0,1,float16,fp8,0,0.025173333783944447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,128,0,1,fp8,fp8,0,0.025237334271272022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,128,0,1,float16,float16,0,0.025434667865435284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,128,0,1,float16,fp8,0,0.027093333502610523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,128,0,1,fp8,fp8,0,0.025626666843891144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,128,0,1,float16,float16,0,0.025216000775496166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,128,0,1,float16,fp8,0,0.02738133321205775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,128,0,1,fp8,fp8,0,0.02515733242034912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,128,0,1,float16,float16,0,0.02117866774400075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,128,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,128,0,1,fp8,fp8,0,0.01937599976857503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,128,0,1,float16,float16,0,0.01929066702723503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,128,0,1,float16,fp8,0,0.021104000508785248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,128,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,128,0,1,float16,float16,0,0.019925333559513092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,128,0,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,128,0,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,128,0,1,float16,float16,0,0.01947733387351036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,128,0,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,128,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,128,0,1,float16,float16,0,0.019018666197856266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,128,0,1,float16,fp8,0,0.01960533360640208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,128,0,1,fp8,fp8,0,0.021429332594076794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,128,0,1,float16,float16,0,0.016522667060295742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,128,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,128,0,1,float16,fp8,0,0.01762666677435239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,128,0,1,fp8,fp8,0,0.01747200017174085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,128,0,1,float16,float16,0,0.017498667041460674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,128,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,128,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,128,0,1,float16,fp8,0,0.018826667219400406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,128,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,128,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,128,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,128,0,1,float16,float16,0,0.015658666690190632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,128,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,128,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,128,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,128,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,128,0,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,128,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,128,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,128,0,1,float16,fp8,0,0.01670933390657107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,128,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,128,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,128,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,128,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,128,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,128,0,1,fp8,fp8,0,0.015509333461523056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,128,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,128,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,128,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,128,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,0,0.16330132881800333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,0,0.1644000013669332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,1,128,0,1,fp8,fp8,0,0.15601600209871927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,0,0.164410670598348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,0,0.1646933356920878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,2,128,0,1,fp8,fp8,0,0.154448002576828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,0,0.16400532921155295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,0,0.1630826691786448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,4,128,0,1,fp8,fp8,0,0.15453867117563883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,0,0.16406400005022684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,0,0.16338133811950684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,32,8,128,0,1,fp8,fp8,0,0.15640532970428467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,0,0.08897067109743755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,0,0.0900320013364156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,32,128,0,1,fp8,fp8,0,0.08474666873613994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,0,0.08879466851552327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,0,0.08910399675369263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,1,128,0,1,fp8,fp8,0,0.08453866839408875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,0,0.08894933263460796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,0,0.08886933326721191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,2,128,0,1,fp8,fp8,0,0.085125337044398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,0,0.08918933073679607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,0,0.08898666501045227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,0,0.08737066388130188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,4,128,0,1,fp8,fp8,0,0.0851200024286906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,0,0.0516480008761088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,0,0.08756267031033833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,32,8,128,0,1,fp8,fp8,0,0.08481066425641377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,0,0.05186666548252106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,32,128,0,1,fp8,fp8,0,0.04994133114814758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,0,0.052144000927607216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,0,0.05239999790986379
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,1,128,0,1,fp8,fp8,0,0.05082666873931885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,0,0.05204799771308899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,0,0.05202133456865946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,2,128,0,1,fp8,fp8,0,0.05019199848175049
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,0,0.05186133086681366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,0,0.05231999854246775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,4,128,0,1,fp8,fp8,0,0.04978133241335551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,0,0.05171733101209005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,0,0.03161599983771642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,0,0.052042668064435325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,32,8,128,0,1,fp8,fp8,0,0.04975466430187225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,0,0.033413333197434746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,32,128,0,1,fp8,fp8,0,0.031498665610949196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,0,0.03339199970165888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,0,0.03341866781314214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,1,128,0,1,fp8,fp8,0,0.03156800071398417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,0,0.03340800106525421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,0,0.03350399931271871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,2,128,0,1,fp8,fp8,0,0.033557333052158356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,0,0.03528533379236857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,0,0.03544000039498011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,4,128,0,1,fp8,fp8,0,0.03363733241955439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,0,0.03442666679620743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,0,0.033717334270477295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,0,0.02518933266401291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,32,8,128,0,1,fp8,fp8,0,0.031471999982992806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,0,0.02513599892457326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,32,128,0,1,fp8,fp8,0,0.02554133286078771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,0,0.02508266766866048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,0,0.025402667621771496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,1,128,0,1,fp8,fp8,0,0.023445333043734234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,0,0.025098666548728943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,0,0.02516266703605652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,2,128,0,1,fp8,fp8,0,0.025349333882331848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,0,0.025536000728607178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,4,128,0,1,fp8,fp8,0,0.02752533306678136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,0,0.025098666548728943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,0,0.02516266703605652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,0,0.021583999196688335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,32,8,128,0,1,fp8,fp8,0,0.025402667621771496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,0,0.021183999876181286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,32,128,0,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,0,0.020469332734743755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,1,128,0,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,0,0.021589333812395733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,2,128,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,0,0.023402666052182514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,0,0.023237332701683044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,4,128,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,0,0.02309866746266683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,32,8,128,0,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,32,128,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,1,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,2,128,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,4,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,32,8,128,0,1,fp8,fp8,0,0.017808000246683758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,32,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,1,128,0,1,fp8,fp8,0,0.01599466676513354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,0,0.01775466650724411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,2,128,0,1,fp8,fp8,0,0.015930666277805965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,4,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,0,0.01823466643691063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,32,8,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,32,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,1,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,0,0.01580799991885821
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,2,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,4,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,32,8,128,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,128,0,1,fp8,fp8,0,9.366538365681967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,128,0,1,float16,float16,0,14.25442632039388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,128,0,1,float16,fp8,0,15.572981516520182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,128,0,1,float16,float16,0,15.370671590169271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,128,0,1,fp8,fp8,0,9.222586949666342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,128,0,1,float16,fp8,0,16.532928466796875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,128,0,1,float16,float16,0,17.157739003499348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,128,0,1,fp8,fp8,0,9.737728118896484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,128,0,1,float16,fp8,0,17.232245127360027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,128,0,1,float16,float16,0,15.950655619303385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,128,0,1,fp8,fp8,0,9.581573486328125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,128,0,1,float16,float16,0,8.874325434366861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,128,0,1,fp8,fp8,0,4.860618591308594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,128,0,1,float16,fp8,0,8.064805348714193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,128,0,1,float16,fp8,0,16.944469451904297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,128,0,1,float16,float16,0,8.365552266438803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,128,0,1,fp8,fp8,0,4.796794573465983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,128,0,1,float16,fp8,0,8.305221557617188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,128,0,1,float16,float16,0,8.459573109944662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,128,0,1,fp8,fp8,0,5.062117258707683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,128,0,1,float16,fp8,0,8.550538380940756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,128,0,1,float16,float16,0,7.697226842244466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,128,0,1,fp8,fp8,0,4.971455891927083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,128,0,1,float16,fp8,0,8.46664555867513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,128,0,1,fp8,fp8,0,4.981306711832683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,128,0,1,float16,float16,0,8.727349599202475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,128,0,1,float16,float16,0,4.414506594340007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,128,0,1,float16,fp8,0,7.887994766235352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,128,0,1,fp8,fp8,0,2.7315734227498374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,128,0,1,float16,fp8,0,4.508298556009929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,128,0,1,float16,fp8,0,3.7740745544433594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,128,0,1,float16,float16,0,3.993306795756022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,128,0,1,fp8,fp8,0,2.327594598134359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,128,0,1,fp8,fp8,0,2.370469411214193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,128,0,1,float16,float16,0,3.9355732599894204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,128,0,1,float16,fp8,0,3.1299308141072593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,128,0,1,float16,float16,0,3.8472372690836587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,128,0,1,fp8,fp8,0,2.288501262664795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,128,0,1,float16,fp8,0,4.407567977905273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,128,0,1,float16,float16,0,4.023578643798828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,128,0,1,float16,fp8,0,4.234106699625651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,128,0,1,float16,float16,0,1.9236639340718586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,128,0,1,fp8,fp8,0,2.369061311086019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,128,0,1,float16,fp8,0,1.4670027097066243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,128,0,1,fp8,fp8,0,1.285914659500122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,128,0,1,float16,float16,0,1.5530719757080078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,128,0,1,float16,fp8,0,1.4755253791809082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,128,0,1,fp8,fp8,0,1.7804746627807617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,128,0,1,float16,float16,0,1.4650293986002605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,128,0,1,fp8,fp8,0,1.2516746520996094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,128,0,1,float16,fp8,0,1.8255093892415364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,128,0,1,float16,float16,0,1.4304265975952148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,128,0,1,float16,fp8,0,1.4591840108235676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,128,0,1,fp8,fp8,0,1.2526666323343914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,128,0,1,float16,float16,0,1.44049072265625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,128,0,1,float16,fp8,0,1.7437599500020344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,128,0,1,fp8,fp8,0,1.2589386304219563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,128,0,1,fp8,fp8,0,5.304922739664714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,128,0,1,float16,float16,0,8.796607971191406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,128,0,1,float16,fp8,0,10.057498931884766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,128,0,1,float16,float16,0,9.908096313476562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,128,0,1,fp8,fp8,0,5.57370122273763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,128,0,1,float16,fp8,0,8.709871927897135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,128,0,1,float16,float16,0,10.092010498046875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,128,0,1,float16,fp8,0,9.60148811340332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,128,0,1,fp8,fp8,0,6.013013203938802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,128,0,1,float16,float16,0,9.65831438700358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,128,0,1,float16,float16,0,5.238373438517253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,128,0,1,fp8,fp8,0,6.477541605631511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,128,0,1,fp8,fp8,0,3.0041332244873047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,128,0,1,float16,fp8,0,4.733877182006836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,128,0,1,float16,fp8,0,10.022288004557291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,128,0,1,float16,float16,0,5.055317242940267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,128,0,1,fp8,fp8,0,2.57205867767334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,128,0,1,float16,fp8,0,5.008853276570638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,128,0,1,float16,float16,0,5.623392105102539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,128,0,1,fp8,fp8,0,2.576474666595459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,128,0,1,float16,fp8,0,5.027578671773274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,128,0,1,float16,float16,0,4.29695987701416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,128,0,1,fp8,fp8,0,2.6541120211283364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,128,0,1,float16,fp8,0,4.997541427612305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,128,0,1,float16,float16,0,5.26362673441569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,128,0,1,fp8,fp8,0,2.599386692047119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,128,0,1,float16,fp8,0,4.892218589782715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,128,0,1,float16,float16,0,2.348192056020101
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,128,0,1,float16,fp8,0,1.695642630259196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,128,0,1,fp8,fp8,0,1.43503999710083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,128,0,1,float16,float16,0,2.107914606730143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,128,0,1,float16,fp8,0,1.5911413828531902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,128,0,1,fp8,fp8,0,1.3790399233500164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,128,0,1,float16,float16,0,2.2248427073160806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,128,0,1,float16,fp8,0,1.5938453674316406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,128,0,1,fp8,fp8,0,1.3838666280110676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,128,0,1,float16,fp8,0,1.5862773259480794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,128,0,1,float16,float16,0,1.6600640614827473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,128,0,1,fp8,fp8,0,1.3871946334838867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,128,0,1,float16,float16,0,1.5936959584554036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,128,0,1,float16,float16,0,0.9177227020263672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,128,0,1,float16,fp8,0,1.0270826816558838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,128,0,1,fp8,fp8,0,1.436463991800944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,128,0,1,float16,fp8,0,2.1078826586405435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,128,0,1,fp8,fp8,0,0.8171892960866293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,128,0,1,float16,float16,0,0.8971467018127441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,128,0,1,float16,fp8,0,0.912874698638916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,128,0,1,fp8,fp8,0,0.7882506847381592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,128,0,1,float16,float16,0,0.897589365641276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,128,0,1,float16,fp8,0,0.8977333704630533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,128,0,1,fp8,fp8,0,0.7932053407033285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,128,0,1,float16,float16,0,0.8995306491851807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,128,0,1,float16,fp8,0,0.9024266401926676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,128,0,1,fp8,fp8,0,0.803178628285726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,128,0,1,float16,float16,0,0.912442684173584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,128,0,1,float16,fp8,0,0.9011733531951904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,128,0,1,fp8,fp8,0,0.7992160320281982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,128,0,1,fp8,fp8,0,4.050607999165853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,128,0,1,float16,float16,0,6.406522750854492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,128,0,1,float16,float16,0,5.841333389282227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,128,0,1,float16,fp8,0,6.556442896525065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,128,0,1,fp8,fp8,0,3.9817225138346353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,128,0,1,float16,fp8,0,6.415306727091472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,128,0,1,float16,float16,0,5.863594690958659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,128,0,1,float16,fp8,0,7.009685516357422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,128,0,1,fp8,fp8,0,3.935797373453776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,128,0,1,float16,float16,0,3.2424532572428384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,128,0,1,fp8,fp8,0,4.4688161214192705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,128,0,1,float16,fp8,0,6.5790665944417315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,128,0,1,float16,float16,0,6.974527994791667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,128,0,1,fp8,fp8,0,1.9776852925618489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,128,0,1,float16,fp8,0,3.503925323486328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,128,0,1,float16,float16,0,2.5917867024739585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,128,0,1,float16,fp8,0,3.5370505650838218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,128,0,1,fp8,fp8,0,2.113407929738363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,128,0,1,float16,fp8,0,2.1781652768452964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,128,0,1,float16,float16,0,3.2747414906819663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,128,0,1,fp8,fp8,0,2.0174080530802407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,128,0,1,float16,float16,0,3.545743942260742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,128,0,1,fp8,fp8,0,1.91377592086792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,128,0,1,float16,fp8,0,3.109877268473307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,128,0,1,float16,float16,0,3.260688145955404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,128,0,1,float16,float16,0,1.2677226861317952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,128,0,1,fp8,fp8,0,1.9520692825317383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,128,0,1,float16,fp8,0,3.558863957722982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,128,0,1,float16,fp8,0,1.646341323852539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,128,0,1,fp8,fp8,0,1.2336106300354004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,128,0,1,float16,float16,0,1.190677324930827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,128,0,1,float16,fp8,0,1.6101813316345215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,128,0,1,fp8,fp8,0,1.0218133131663005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,128,0,1,float16,float16,0,1.1751413345336914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,128,0,1,float16,fp8,0,1.205082654953003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,128,0,1,fp8,fp8,0,1.0281706651051838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,128,0,1,float16,float16,0,1.1780693531036377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,128,0,1,float16,fp8,0,1.2561226685841878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,128,0,1,fp8,fp8,0,1.10316268603007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,128,0,1,float16,float16,0,1.2023946444193523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,128,0,1,float16,fp8,0,1.1882346471150715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,128,0,1,float16,float16,0,0.6896373430887858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,128,0,1,fp8,fp8,0,1.2723946571350098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,128,0,1,float16,fp8,0,0.7102346420288086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,128,0,1,float16,float16,0,0.669263998667399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,128,0,1,fp8,fp8,0,0.6452373266220093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,128,0,1,float16,fp8,0,0.677903970082601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,128,0,1,fp8,fp8,0,0.621392011642456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,128,0,1,float16,float16,0,0.6750240325927734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,128,0,1,float16,fp8,0,0.6714719931284586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,128,0,1,fp8,fp8,0,0.5978240172068278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,128,0,1,float16,float16,0,0.6834399700164795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,128,0,1,fp8,fp8,0,0.5993280013402303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,128,0,1,float16,fp8,0,0.6823146343231201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,128,0,1,float16,float16,0,0.6803146998087565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,128,0,1,float16,fp8,0,0.6877439816792806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,128,0,1,fp8,fp8,0,0.602399984995524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,128,0,1,fp8,fp8,0,5.577642440795898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,128,0,1,float16,fp8,0,7.967221577962239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,128,0,1,float16,float16,0,8.395530700683594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,128,0,1,float16,float16,0,8.843023935953775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,128,0,1,fp8,fp8,0,4.880181312561035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,128,0,1,float16,fp8,0,9.025018692016602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,128,0,1,float16,float16,0,9.074613571166992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,128,0,1,float16,fp8,0,9.236533482869467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,128,0,1,fp8,fp8,0,5.169381459554036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,128,0,1,float16,float16,0,4.667370796203613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,128,0,1,float16,float16,0,10.22978655497233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,128,0,1,fp8,fp8,0,6.186858495076497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,128,0,1,float16,fp8,0,9.30454953511556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,128,0,1,fp8,fp8,0,2.9489758809407554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,128,0,1,float16,fp8,0,5.160698572794597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,128,0,1,float16,float16,0,4.4613386789957685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,128,0,1,fp8,fp8,0,2.685296058654785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,128,0,1,float16,fp8,0,3.8880319595336914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,128,0,1,float16,float16,0,3.0831359227498374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,128,0,1,float16,fp8,0,4.409877459208171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,128,0,1,fp8,fp8,0,2.749077479044596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,128,0,1,float16,float16,0,4.697194735209147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,128,0,1,float16,fp8,0,3.833653450012207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,128,0,1,fp8,fp8,0,2.503701368967692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,128,0,1,float16,float16,0,4.678981463114421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,128,0,1,float16,float16,0,1.7674293518066406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,128,0,1,fp8,fp8,0,2.516032059987386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,128,0,1,float16,fp8,0,1.9261280695597331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,128,0,1,float16,fp8,0,4.456128120422363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,128,0,1,fp8,fp8,0,1.3750559488932292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,128,0,1,float16,float16,0,1.4902772903442383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,128,0,1,float16,fp8,0,2.003173351287842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,128,0,1,fp8,fp8,0,1.4051413536071777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,128,0,1,float16,float16,0,1.4924160639444988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,128,0,1,float16,fp8,0,1.5029652913411458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,128,0,1,fp8,fp8,0,1.378229300181071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,128,0,1,float16,float16,0,1.8622612953186035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,128,0,1,fp8,fp8,0,1.3570772806803386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,128,0,1,float16,fp8,0,2.00490665435791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,128,0,1,float16,float16,0,0.9333066940307617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,128,0,1,float16,float16,0,2.0433066685994468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,128,0,1,fp8,fp8,0,1.3220160007476807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,128,0,1,float16,fp8,0,1.9797226587931316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,128,0,1,float16,fp8,0,0.9375999768575033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,128,0,1,float16,float16,0,0.8185866673787435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,128,0,1,fp8,fp8,0,0.9082880020141602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,128,0,1,float16,fp8,0,0.8816800117492676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,128,0,1,fp8,fp8,0,0.7199146747589111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,128,0,1,float16,float16,0,0.821733315785726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,128,0,1,float16,fp8,0,0.828704039255778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,128,0,1,fp8,fp8,0,0.7205279668172201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,128,0,1,float16,float16,0,0.8239946365356445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,128,0,1,float16,fp8,0,0.8371626536051432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,128,0,1,fp8,fp8,0,0.7880160013834635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,128,0,1,float16,float16,0,0.8222613334655762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,128,0,1,float16,float16,0,0.4979626735051473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,128,0,1,float16,fp8,0,0.8305973211924235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,128,0,1,float16,fp8,0,0.5028853416442871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,128,0,1,fp8,fp8,0,0.9915733337402344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,128,0,1,fp8,fp8,0,0.4501013358434041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,128,0,1,float16,float16,0,0.5434720118840536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,128,0,1,float16,fp8,0,0.47540799776713055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,128,0,1,fp8,fp8,0,0.42874133586883545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,128,0,1,float16,float16,0,0.4822346766789754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,128,0,1,fp8,fp8,0,0.42642664909362793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,128,0,1,float16,fp8,0,0.48004265626271564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,128,0,1,float16,float16,0,0.48018133640289307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,128,0,1,float16,fp8,0,0.4851573308308919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,128,0,1,fp8,fp8,0,0.43032534917195636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,128,0,1,float16,float16,0,0.48636265595753986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,128,0,1,float16,fp8,0,0.48632001876831055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,128,0,1,fp8,fp8,0,0.4366346597671509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,128,0,1,fp8,fp8,0,2.9813013076782227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,128,0,1,float16,fp8,0,4.946394602457683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,128,0,1,float16,float16,0,5.189237276713054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,128,0,1,float16,float16,0,5.711301167805989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,128,0,1,float16,fp8,0,4.328895886739095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,128,0,1,fp8,fp8,0,3.118133227030436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,128,0,1,float16,fp8,0,5.192293485005696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,128,0,1,float16,float16,0,5.179098765055339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,128,0,1,fp8,fp8,0,3.0134452184041343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,128,0,1,float16,float16,0,5.314085324605306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,128,0,1,float16,float16,0,1.8551093737284343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,128,0,1,fp8,fp8,0,3.4308160146077475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,128,0,1,float16,fp8,0,5.539610544840495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,128,0,1,fp8,fp8,0,1.6604587237040203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,128,0,1,float16,fp8,0,2.5186773935953775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,128,0,1,float16,float16,0,2.2217653592427573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,128,0,1,fp8,fp8,0,1.5363200505574544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,128,0,1,float16,fp8,0,2.0673599243164062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,128,0,1,float16,float16,0,2.2140480677286782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,128,0,1,float16,fp8,0,2.3752800623575845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,128,0,1,fp8,fp8,0,1.5423253377278645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,128,0,1,float16,float16,0,2.597760041554769
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,128,0,1,fp8,fp8,0,1.650426705678304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,128,0,1,float16,fp8,0,2.4642133712768555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,128,0,1,float16,float16,0,1.7810773849487305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,128,0,1,float16,float16,0,0.9900320370992025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,128,0,1,fp8,fp8,0,1.6725120544433594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,128,0,1,float16,fp8,0,1.2149919668833415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,128,0,1,float16,fp8,0,2.494383970896403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,128,0,1,fp8,fp8,0,1.0657386779785156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,128,0,1,float16,float16,0,0.9970933596293131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,128,0,1,float16,fp8,0,1.0139466921488445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,128,0,1,fp8,fp8,0,0.8215306599934896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,128,0,1,float16,float16,0,0.9469119707743326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,128,0,1,float16,fp8,0,0.9847626686096191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,128,0,1,fp8,fp8,0,0.8233599662780762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,128,0,1,float16,float16,0,0.9409279823303223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,128,0,1,float16,fp8,0,0.9479146798451742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,128,0,1,fp8,fp8,0,0.8323093255360922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,128,0,1,float16,float16,0,0.9634400208791097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,128,0,1,float16,fp8,0,1.0670080184936523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,128,0,1,float16,float16,0,0.5571573177973429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,128,0,1,fp8,fp8,0,0.8426026503245035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,128,0,1,float16,fp8,0,0.725167989730835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,128,0,1,fp8,fp8,0,0.4933226505915324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,128,0,1,float16,float16,0,0.5244160095850626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,128,0,1,fp8,fp8,0,0.46439464886983234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,128,0,1,float16,float16,0,0.5276906490325928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,128,0,1,float16,fp8,0,0.6885440349578857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,128,0,1,float16,fp8,0,0.5260213216145834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,128,0,1,fp8,fp8,0,0.4698186715443929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,128,0,1,float16,float16,0,0.5254666805267334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,128,0,1,fp8,fp8,0,0.47088531653086346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,128,0,1,float16,fp8,0,0.5309439897537231
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,128,0,1,float16,float16,0,0.5297386646270752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,128,0,1,float16,float16,0,0.3288480043411255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,128,0,1,float16,fp8,0,0.5402506589889526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,128,0,1,fp8,fp8,0,0.4746239980061849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,128,0,1,float16,fp8,0,0.3328853249549866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,128,0,1,fp8,fp8,0,0.30001600583394367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,128,0,1,float16,float16,0,0.31174933910369873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,128,0,1,float16,fp8,0,0.3118666609128316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,128,0,1,fp8,fp8,0,0.28329600890477497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,128,0,1,float16,float16,0,0.3122719923655192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,128,0,1,fp8,fp8,0,0.285263995329539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,128,0,1,float16,fp8,0,0.3130720059076945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,128,0,1,float16,float16,0,0.3138773242632548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,128,0,1,fp8,fp8,0,0.2858293255170186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,128,0,1,float16,fp8,0,0.3184533317883809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,128,0,1,float16,float16,0,0.3179413278897603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,128,0,1,fp8,fp8,0,0.2924693425496419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,128,0,1,float16,fp8,0,0.3187626600265503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,128,0,1,fp8,fp8,0,3.2911198933919272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,128,0,1,float16,fp8,0,4.228170712788899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,128,0,1,float16,float16,0,4.789114634195964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,128,0,1,float16,float16,0,4.876778602600098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,128,0,1,fp8,fp8,0,3.098618825276693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,128,0,1,float16,fp8,0,4.7766774495442705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,128,0,1,float16,float16,0,4.6786454518636065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,128,0,1,float16,fp8,0,5.143226623535156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,128,0,1,fp8,fp8,0,3.1211681365966797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,128,0,1,float16,float16,0,5.356688181559245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,128,0,1,float16,float16,0,2.158928076426188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,128,0,1,fp8,fp8,0,3.4935681025187173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,128,0,1,float16,fp8,0,4.695258776346843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,128,0,1,fp8,fp8,0,1.720245361328125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,128,0,1,float16,fp8,0,2.17411740620931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,128,0,1,float16,float16,0,1.9085440635681152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,128,0,1,float16,fp8,0,1.847050666809082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,128,0,1,fp8,fp8,0,1.557653268178304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,128,0,1,float16,float16,0,2.070367972056071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,128,0,1,fp8,fp8,0,1.5652373631795247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,128,0,1,float16,fp8,0,2.2764053344726562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,128,0,1,float16,float16,0,1.7903626759847004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,128,0,1,fp8,fp8,0,1.5798293749491374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,128,0,1,float16,fp8,0,2.2892746925354004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,128,0,1,float16,float16,0,2.442954699198405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,128,0,1,fp8,fp8,0,1.6064799626668294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,128,0,1,float16,fp8,0,2.0834827423095703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,128,0,1,float16,float16,0,1.2563199996948242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,128,0,1,float16,fp8,0,1.2073760032653809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,128,0,1,fp8,fp8,0,0.8993546962738037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,128,0,1,float16,float16,0,1.1174399852752686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,128,0,1,fp8,fp8,0,0.8599253495534261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,128,0,1,float16,fp8,0,1.3942346572875977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,128,0,1,float16,float16,0,0.9394079844156901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,128,0,1,float16,fp8,0,0.9414933522542318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,128,0,1,fp8,fp8,0,0.8200159867604574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,128,0,1,float16,float16,0,1.2155040105183919
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,128,0,1,float16,fp8,0,0.9432746569315592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,128,0,1,fp8,fp8,0,0.830842653910319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,128,0,1,float16,float16,0,0.9487946828206381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,128,0,1,float16,float16,0,0.5411306619644165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,128,0,1,float16,fp8,0,1.0222933292388916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,128,0,1,fp8,fp8,0,0.8743039766947428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,128,0,1,float16,fp8,0,0.551695982615153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,128,0,1,fp8,fp8,0,0.48842668533325195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,128,0,1,fp8,fp8,0,0.4479893445968628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,128,0,1,float16,fp8,0,0.5064320166905721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,128,0,1,float16,float16,0,0.654917319615682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,128,0,1,float16,float16,0,0.5075893402099609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,128,0,1,fp8,fp8,0,0.4511573314666748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,128,0,1,float16,fp8,0,0.5094240109125773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,128,0,1,float16,float16,0,0.6598933140436808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,128,0,1,float16,fp8,0,0.5158933401107788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,128,0,1,fp8,fp8,0,0.45555198192596436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,128,0,1,float16,float16,0,0.5150133371353149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,128,0,1,float16,fp8,0,0.5272906621297201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,128,0,1,float16,float16,0,0.3101759950319926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,128,0,1,fp8,fp8,0,0.4628320137659709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,128,0,1,float16,fp8,0,0.3132586677869161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,128,0,1,fp8,fp8,0,0.2853226661682129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,128,0,1,float16,float16,0,0.2863626678784688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,128,0,1,float16,fp8,0,0.2887733379999797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,128,0,1,fp8,fp8,0,0.26105066140492755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,128,0,1,float16,float16,0,0.2898400028546651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,128,0,1,float16,fp8,0,0.28863465785980225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,128,0,1,fp8,fp8,0,0.26377065976460773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,128,0,1,float16,float16,0,0.29257599512736004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,128,0,1,float16,fp8,0,0.29315199454625446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,128,0,1,fp8,fp8,0,0.265882670879364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,128,0,1,float16,float16,0,0.29815467198689777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,128,0,1,float16,fp8,0,0.29950932661692303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,128,0,1,float16,float16,0,0.19314666589101157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,128,0,1,fp8,fp8,0,0.2694080074628194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,128,0,1,float16,fp8,0,0.19545066356658936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,128,0,1,fp8,fp8,0,0.17964265743891397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,128,0,1,float16,float16,0,0.17922133207321167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,128,0,1,float16,fp8,0,0.18405866622924805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,128,0,1,fp8,fp8,0,0.1682186722755432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,128,0,1,float16,float16,0,0.17994133631388345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,128,0,1,float16,fp8,0,0.18103466431299844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,128,0,1,fp8,fp8,0,0.16833599408467612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,128,0,1,float16,float16,0,0.18021865685780844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,128,0,1,float16,fp8,0,0.1844373345375061
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,128,0,1,fp8,fp8,0,0.16831467549006143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,128,0,1,float16,float16,0,0.1803306738535563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,128,0,1,fp8,fp8,0,0.16981865962346396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,128,0,1,float16,fp8,0,0.1851466695467631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,128,0,1,float16,float16,0,2.237178643544515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,128,0,1,fp8,fp8,0,1.9814720153808594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,128,0,1,float16,fp8,0,2.234341303507487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,128,0,1,float16,fp8,0,2.352565288543701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,128,0,1,fp8,fp8,0,1.9935146967569988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,128,0,1,float16,float16,0,2.6152159372965493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,128,0,1,fp8,fp8,0,2.015061378479004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,128,0,1,float16,float16,0,2.7569119135538735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,128,0,1,float16,fp8,0,2.6988906860351562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,128,0,1,float16,float16,0,2.823205312093099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,128,0,1,float16,float16,0,1.5105867385864258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,128,0,1,float16,fp8,0,2.527440071105957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,128,0,1,fp8,fp8,0,2.054218610127767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,128,0,1,float16,fp8,0,1.5484800338745117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,128,0,1,fp8,fp8,0,1.1333813667297363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,128,0,1,float16,fp8,0,1.166591962178548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,128,0,1,float16,float16,0,1.3262773354848225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,128,0,1,fp8,fp8,0,1.0120480060577393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,128,0,1,float16,float16,0,1.167301336924235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,128,0,1,fp8,fp8,0,1.0212746461232503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,128,0,1,float16,fp8,0,1.1998613675435383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,128,0,1,float16,float16,0,1.1643839677174885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,128,0,1,fp8,fp8,0,1.0312693119049072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,128,0,1,float16,fp8,0,1.2485973040262859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,128,0,1,float16,fp8,0,1.1885173320770264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,128,0,1,float16,float16,0,1.1964746316274006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,128,0,1,float16,float16,0,0.7122560342152914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,128,0,1,float16,fp8,0,0.6713333129882812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,128,0,1,fp8,fp8,0,0.7339893182118734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,128,0,1,fp8,fp8,0,1.0483626524607341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,128,0,1,float16,float16,0,0.6170239845911661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,128,0,1,fp8,fp8,0,0.5392106771469116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,128,0,1,float16,fp8,0,0.6119573513666788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,128,0,1,float16,float16,0,0.6149386564890543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,128,0,1,float16,fp8,0,0.6200639804204305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,128,0,1,fp8,fp8,0,0.5416160027186075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,128,0,1,float16,float16,0,0.6199573278427124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,128,0,1,float16,fp8,0,0.6207520167032877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,128,0,1,fp8,fp8,0,0.5496533314387003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,128,0,1,float16,float16,0,0.6253866751988729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,128,0,1,fp8,fp8,0,0.557914654413859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,128,0,1,float16,fp8,0,0.6323839823404948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,128,0,1,float16,float16,0,0.36556267738342285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,128,0,1,float16,fp8,0,0.3700266679128011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,128,0,1,fp8,fp8,0,0.3306559920310974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,128,0,1,float16,float16,0,0.33341864744822186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,128,0,1,float16,fp8,0,0.33690134684244794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,128,0,1,fp8,fp8,0,0.3004159927368164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,128,0,1,float16,float16,0,0.33634666601816815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,128,0,1,float16,fp8,0,0.3365653355916341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,128,0,1,fp8,fp8,0,0.30345600843429565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,128,0,1,float16,float16,0,0.34169065952301025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,128,0,1,float16,fp8,0,0.34133867422739667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,128,0,1,fp8,fp8,0,0.3067946632703145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,128,0,1,float16,float16,0,0.3476693232854207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,128,0,1,float16,fp8,0,0.346346656481425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,128,0,1,float16,float16,0,0.2143253286679586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,128,0,1,fp8,fp8,0,0.31033066908518475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,128,0,1,float16,fp8,0,0.2192373275756836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,128,0,1,fp8,fp8,0,0.19645333290100098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,128,0,1,float16,float16,0,0.19452265898386636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,128,0,1,float16,fp8,0,0.19548267126083374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,128,0,1,fp8,fp8,0,0.17427200078964233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,128,0,1,float16,float16,0,0.1932106614112854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,128,0,1,float16,fp8,0,0.19166400035222372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,128,0,1,fp8,fp8,0,0.1776533325513204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,128,0,1,float16,float16,0,0.19497599204381308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,128,0,1,float16,fp8,0,0.19594667355219522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,128,0,1,fp8,fp8,0,0.17851199706395468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,128,0,1,float16,float16,0,0.20062400897343954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,128,0,1,float16,fp8,0,0.20409067471822104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,128,0,1,float16,float16,0,0.13302399714787802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,128,0,1,fp8,fp8,0,0.1872373421986898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,128,0,1,float16,fp8,0,0.13662933309872946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,128,0,1,fp8,fp8,0,0.12845333417256674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,128,0,1,float16,float16,0,0.12738666931788126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,128,0,1,fp8,fp8,0,0.12037866314252217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,128,0,1,float16,fp8,0,0.12924266854921976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,128,0,1,float16,float16,0,0.13131200273831686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,128,0,1,float16,fp8,0,0.12870933612187704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,128,0,1,fp8,fp8,0,0.11980266372362773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,128,0,1,float16,float16,0,0.12812800208727518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,128,0,1,float16,fp8,0,0.12993599971135458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,128,0,1,fp8,fp8,0,0.11979200442632039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,128,0,1,float16,float16,0,0.12827733159065247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,128,0,1,float16,fp8,0,0.1302826702594757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,128,0,1,fp8,fp8,0,0.12078932921091716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,128,0,1,fp8,fp8,0,2.1943467458089194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,128,0,1,float16,float16,0,2.547760009765625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,128,0,1,float16,fp8,0,2.6854613622029624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,128,0,1,float16,float16,0,2.5370240211486816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,128,0,1,float16,fp8,0,2.593712011973063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,128,0,1,fp8,fp8,0,2.209130605061849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,128,0,1,fp8,fp8,0,2.2614453633626304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,128,0,1,float16,float16,0,2.5948959986368814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,128,0,1,float16,fp8,0,2.9783414204915366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,128,0,1,float16,float16,0,2.8878186543782554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,128,0,1,float16,float16,0,1.5196587244669597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,128,0,1,fp8,fp8,0,2.2820372581481934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,128,0,1,float16,fp8,0,2.6835254033406577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,128,0,1,float16,fp8,0,1.4179946581522624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,128,0,1,fp8,fp8,0,1.262170632680257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,128,0,1,float16,float16,0,1.2499199708302815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,128,0,1,float16,fp8,0,1.2531999746958415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,128,0,1,fp8,fp8,0,1.1063946882883708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,128,0,1,float16,float16,0,1.2552106380462646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,128,0,1,fp8,fp8,0,1.1648159821828206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,128,0,1,float16,fp8,0,1.2659786542256672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,128,0,1,float16,float16,0,1.2821919918060303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,128,0,1,float16,fp8,0,1.2947306632995605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,128,0,1,fp8,fp8,0,1.1270293394724529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,128,0,1,float16,float16,0,1.292693297068278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,128,0,1,float16,float16,0,0.7197173436482748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,128,0,1,float16,fp8,0,1.3967520395914714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,128,0,1,float16,fp8,0,0.7344799836476644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,128,0,1,fp8,fp8,0,1.1536533037821453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,128,0,1,fp8,fp8,0,0.7298826376597086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,128,0,1,float16,float16,0,0.6571413278579712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,128,0,1,fp8,fp8,0,0.5762240091959635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,128,0,1,float16,fp8,0,0.6553546587626139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,128,0,1,float16,float16,0,0.658026655515035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,128,0,1,float16,fp8,0,0.6612853209177653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,128,0,1,fp8,fp8,0,0.581551988919576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,128,0,1,float16,float16,0,0.6609493494033813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,128,0,1,fp8,fp8,0,0.5886720021565756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,128,0,1,float16,fp8,0,0.6653653383255005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,128,0,1,float16,float16,0,0.673205296198527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,128,0,1,float16,fp8,0,0.6809386412302653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,128,0,1,float16,float16,0,0.38329601287841797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,128,0,1,float16,fp8,0,0.3915040095647176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,128,0,1,fp8,fp8,0,0.35438934961954754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,128,0,1,fp8,fp8,0,0.6009493271509806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,128,0,1,float16,float16,0,0.3484213352203369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,128,0,1,float16,fp8,0,0.3496853510538737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,128,0,1,fp8,fp8,0,0.3129439949989319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,128,0,1,float16,float16,0,0.3551679849624634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,128,0,1,float16,fp8,0,0.35188265641530353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,128,0,1,fp8,fp8,0,0.3145119945208232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,128,0,1,float16,float16,0,0.3543200095494588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,128,0,1,float16,fp8,0,0.35763200124104816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,128,0,1,fp8,fp8,0,0.3179733355840047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,128,0,1,float16,float16,0,0.36486931641896564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,128,0,1,float16,fp8,0,0.3636853297551473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,128,0,1,fp8,fp8,0,0.3251360058784485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,128,0,1,float16,float16,0,0.21594132979710898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,128,0,1,float16,fp8,0,0.22167466084162393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,128,0,1,fp8,fp8,0,0.20025600989659628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,128,0,1,float16,float16,0,0.19177599747975668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,128,0,1,float16,fp8,0,0.1920586625734965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,128,0,1,fp8,fp8,0,0.17799999316533408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,128,0,1,float16,float16,0,0.19562133153279623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,128,0,1,float16,fp8,0,0.1966186761856079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,128,0,1,fp8,fp8,0,0.18012267351150513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,128,0,1,float16,float16,0,0.19686933358510336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,128,0,1,float16,fp8,0,0.19876267512639365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,128,0,1,fp8,fp8,0,0.18317333857218424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,128,0,1,float16,float16,0,0.20113599300384521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,128,0,1,float16,fp8,0,0.20320000251134238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,128,0,1,float16,float16,0,0.13075733184814453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,128,0,1,fp8,fp8,0,0.18556267023086548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,128,0,1,float16,fp8,0,0.1302293340365092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,128,0,1,fp8,fp8,0,0.12354133526484172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,128,0,1,float16,float16,0,0.11795733372370402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,128,0,1,float16,fp8,0,0.11762666702270508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,128,0,1,fp8,fp8,0,0.10521066188812256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,128,0,1,float16,float16,0,0.11753599842389424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,128,0,1,float16,fp8,0,0.11990400155385335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,128,0,1,fp8,fp8,0,0.10729599992434184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,128,0,1,float16,float16,0,0.11872000495592754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,128,0,1,float16,fp8,0,0.11864533027013142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,128,0,1,fp8,fp8,0,0.10790933171908061
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,128,0,1,float16,float16,0,0.11912533640861511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,128,0,1,float16,fp8,0,0.12126933534940083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,128,0,1,fp8,fp8,0,0.11030399799346924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,128,0,1,float16,float16,0,0.08342933654785156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,128,0,1,float16,fp8,0,0.08689600229263306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,128,0,1,fp8,fp8,0,0.08098666866620381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,128,0,1,float16,float16,0,0.08224000036716461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,128,0,1,float16,fp8,0,0.08413867155710857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,128,0,1,fp8,fp8,0,0.07743999858697255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,128,0,1,float16,float16,0,0.0846453309059143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,128,0,1,float16,fp8,0,0.08338666955629985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,128,0,1,fp8,fp8,0,0.07675733168919881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,128,0,1,float16,float16,0,0.08308266599973042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,128,0,1,float16,fp8,0,0.08313066760698955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,128,0,1,fp8,fp8,0,0.07681599756081899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,128,0,1,float16,float16,0,0.08343999584515889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,128,0,1,float16,fp8,0,0.08496000369389851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,128,0,1,fp8,fp8,0,0.07795199751853943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,128,0,1,fp8,fp8,0,1.4978933334350586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,128,0,1,float16,float16,0,1.6983413696289062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,128,0,1,float16,fp8,0,1.710037390391032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,128,0,1,float16,float16,0,1.7146239280700684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,128,0,1,fp8,fp8,0,1.5152907371520996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,128,0,1,float16,fp8,0,1.720421314239502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,128,0,1,fp8,fp8,0,1.5592212677001953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,128,0,1,float16,float16,0,1.7585493723551433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,128,0,1,float16,fp8,0,1.7579466501871746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,128,0,1,float16,float16,0,0.9749546845753988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,128,0,1,float16,float16,0,1.7571412722269695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,128,0,1,fp8,fp8,0,1.5641973813374836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,128,0,1,float16,fp8,0,1.760250727335612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,128,0,1,float16,fp8,0,0.9835946559906006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,128,0,1,fp8,fp8,0,0.8790026505788168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,128,0,1,float16,float16,0,0.8536960283915201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,128,0,1,float16,fp8,0,0.9303680260976156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,128,0,1,fp8,fp8,0,0.7972799936930338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,128,0,1,float16,float16,0,0.8620213667551676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,128,0,1,float16,fp8,0,0.8675946394602457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,128,0,1,fp8,fp8,0,0.7636000315348307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,128,0,1,float16,float16,0,0.8769493103027344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,128,0,1,fp8,fp8,0,0.7720053195953369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,128,0,1,float16,fp8,0,0.8788586457570394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,128,0,1,float16,float16,0,0.8885707060496012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,128,0,1,float16,float16,0,0.5028479894002279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,128,0,1,float16,fp8,0,0.5070399840672811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,128,0,1,fp8,fp8,0,0.7946133613586426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,128,0,1,float16,fp8,0,0.8965653578440348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,128,0,1,fp8,fp8,0,0.4603360096613566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,128,0,1,float16,float16,0,0.4457813501358032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,128,0,1,float16,fp8,0,0.4506880044937134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,128,0,1,fp8,fp8,0,0.39893333117167157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,128,0,1,float16,float16,0,0.44646398226420086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,128,0,1,float16,fp8,0,0.4511733452479045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,128,0,1,fp8,fp8,0,0.40166934331258136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,128,0,1,float16,float16,0,0.45333866278330487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,128,0,1,float16,fp8,0,0.45956798394521076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,128,0,1,fp8,fp8,0,0.40670935312906903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,128,0,1,float16,float16,0,0.4654506842295329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,128,0,1,float16,fp8,0,0.4684799909591675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,128,0,1,float16,float16,0,0.27031999826431274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,128,0,1,fp8,fp8,0,0.41621867815653485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,128,0,1,float16,fp8,0,0.27691733837127686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,128,0,1,fp8,fp8,0,0.24734399716059366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,128,0,1,float16,float16,0,0.24064000447591147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,128,0,1,fp8,fp8,0,0.21820267041524252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,128,0,1,float16,fp8,0,0.2417280077934265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,128,0,1,float16,float16,0,0.2403093377749125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,128,0,1,float16,fp8,0,0.24308266242345175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,128,0,1,fp8,fp8,0,0.22125333547592163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,128,0,1,float16,float16,0,0.24446932474772134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,128,0,1,float16,fp8,0,0.2468000054359436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,128,0,1,fp8,fp8,0,0.2246133287747701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,128,0,1,float16,float16,0,0.25148799022038776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,128,0,1,float16,fp8,0,0.2552853425343831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,128,0,1,fp8,fp8,0,0.22801067431767783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,128,0,1,float16,float16,0,0.1534773310025533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,128,0,1,float16,fp8,0,0.15851733088493347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,128,0,1,fp8,fp8,0,0.1436906655629476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,128,0,1,float16,float16,0,0.13275733590126038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,128,0,1,float16,fp8,0,0.132314662138621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,128,0,1,fp8,fp8,0,0.12036266922950745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,128,0,1,float16,float16,0,0.13480533162752786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,128,0,1,float16,fp8,0,0.13595199584960938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,128,0,1,fp8,fp8,0,0.12181333700815837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,128,0,1,float16,float16,0,0.1365173359711965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,128,0,1,float16,fp8,0,0.13740266362826029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,128,0,1,fp8,fp8,0,0.12590400377909342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,128,0,1,float16,float16,0,0.13953600327173868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,128,0,1,float16,fp8,0,0.142767995595932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,128,0,1,fp8,fp8,0,0.1317813297112783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,128,0,1,float16,float16,0,0.09089600046475728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,128,0,1,float16,fp8,0,0.09422399600346883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,128,0,1,fp8,fp8,0,0.09126933415730794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,128,0,1,float16,float16,0,0.0853760043780009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,128,0,1,float16,fp8,0,0.08499200145403545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,128,0,1,fp8,fp8,0,0.07868266602357228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,128,0,1,float16,float16,0,0.08532800277074178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,128,0,1,float16,fp8,0,0.08543466528256734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,128,0,1,fp8,fp8,0,0.07888533174991608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,128,0,1,float16,float16,0,0.08548266688982646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,128,0,1,float16,fp8,0,0.08590933680534363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,128,0,1,fp8,fp8,0,0.07864533364772797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,128,0,1,float16,float16,0,0.08703466256459554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,128,0,1,float16,fp8,0,0.08737066388130188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,128,0,1,fp8,fp8,0,0.08125866452852885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,128,0,1,float16,float16,0,0.05842666824658712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,128,0,1,float16,fp8,0,0.059994667768478394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,128,0,1,fp8,fp8,0,0.0583840012550354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,128,0,1,float16,float16,0,0.05795200169086456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,128,0,1,float16,fp8,0,0.058149332801500954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,128,0,1,fp8,fp8,0,0.05380799869696299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,128,0,1,float16,fp8,0,0.0572213331858317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,128,0,1,float16,float16,0,0.05618133147557577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,128,0,1,fp8,fp8,0,0.054373333851496376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,128,0,1,float16,float16,0,0.05693333347638448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,128,0,1,float16,fp8,0,0.05787733197212219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,128,0,1,fp8,fp8,0,0.05607999861240387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,128,0,1,float16,float16,0,0.05815466741720835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,128,0,1,float16,fp8,0,0.058245331048965454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,128,0,1,fp8,fp8,0,0.05605866511662801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,128,0,1,fp8,fp8,0,1.6713066101074219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,128,0,1,float16,float16,0,1.7492373784383137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,128,0,1,float16,fp8,0,1.7517654101053874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,128,0,1,float16,float16,0,1.790986696879069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,128,0,1,float16,fp8,0,1.810640017191569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,128,0,1,fp8,fp8,0,1.8189493815104167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,128,0,1,float16,float16,0,1.802127997080485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,128,0,1,float16,fp8,0,1.8226399421691895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,128,0,1,fp8,fp8,0,1.8331999778747559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,128,0,1,float16,float16,0,1.038149356842041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,128,0,1,float16,fp8,0,1.9224692980448406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,128,0,1,float16,float16,0,1.9365653991699219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,128,0,1,fp8,fp8,0,1.8645226160685222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,128,0,1,float16,fp8,0,1.0207253297170003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,128,0,1,float16,float16,0,0.8930026690165201
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,128,0,1,fp8,fp8,0,1.0061386426289876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,128,0,1,float16,fp8,0,0.8947733243306478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,128,0,1,fp8,fp8,0,0.9308640162150065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,128,0,1,float16,float16,0,0.9085280100504557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,128,0,1,float16,fp8,0,0.9084213574727377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,128,0,1,fp8,fp8,0,0.8657386302947998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,128,0,1,float16,float16,0,0.926367998123169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,128,0,1,float16,fp8,0,0.9206506411234537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,128,0,1,fp8,fp8,0,0.8819146951039633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,128,0,1,float16,float16,0,0.9475626945495605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,128,0,1,float16,fp8,0,0.9467893441518148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,128,0,1,float16,float16,0,0.5360373258590698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,128,0,1,float16,fp8,0,0.5231786568959554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,128,0,1,fp8,fp8,0,0.5149600108464559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,128,0,1,fp8,fp8,0,0.9335573514302572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,128,0,1,float16,float16,0,0.46260801951090497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,128,0,1,float16,fp8,0,0.46116801102956134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,128,0,1,fp8,fp8,0,0.43825066089630127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,128,0,1,float16,float16,0,0.4681386550267537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,128,0,1,float16,fp8,0,0.47098668416341144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,128,0,1,fp8,fp8,0,0.4404533306757609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,128,0,1,float16,float16,0,0.47446401913960773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,128,0,1,float16,fp8,0,0.476421316464742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,128,0,1,fp8,fp8,0,0.4517706632614136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,128,0,1,float16,fp8,0,0.4826666514078776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,128,0,1,float16,float16,0,0.4869706630706787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,128,0,1,fp8,fp8,0,0.4645013411839803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,128,0,1,float16,float16,0,0.28494399785995483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,128,0,1,float16,fp8,0,0.27666133642196655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,128,0,1,float16,float16,0,0.2445440093676249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,128,0,1,fp8,fp8,0,0.27157866954803467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,128,0,1,float16,fp8,0,0.24399999777475992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,128,0,1,fp8,fp8,0,0.2288480003674825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,128,0,1,float16,float16,0,0.24952532847722372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,128,0,1,float16,fp8,0,0.24775999784469604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,128,0,1,fp8,fp8,0,0.2330026626586914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,128,0,1,float16,float16,0,0.25116799275080365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,128,0,1,float16,fp8,0,0.2507839997609456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,128,0,1,fp8,fp8,0,0.23723200956980386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,128,0,1,float16,float16,0,0.25940799713134766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,128,0,1,float16,float16,0,0.15552000204722086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,128,0,1,float16,fp8,0,0.25654399394989014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,128,0,1,fp8,fp8,0,0.24465600649515787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,128,0,1,float16,fp8,0,0.1525759994983673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,128,0,1,fp8,fp8,0,0.1493280033270518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,128,0,1,float16,float16,0,0.13212266564369202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,128,0,1,float16,fp8,0,0.13196800152460733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,128,0,1,fp8,fp8,0,0.12242133418718974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,128,0,1,float16,float16,0,0.13683733344078064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,128,0,1,float16,fp8,0,0.13460266590118408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,128,0,1,fp8,fp8,0,0.1277653376261393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,128,0,1,float16,float16,0,0.13505066434542337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,128,0,1,fp8,fp8,0,0.13106667002042136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,128,0,1,float16,fp8,0,0.13737600048383078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,128,0,1,float16,float16,0,0.139984001715978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,128,0,1,float16,fp8,0,0.13893333077430725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,128,0,1,fp8,fp8,0,0.13742400209108988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,128,0,1,float16,fp8,0,0.08843732873598735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,128,0,1,fp8,fp8,0,0.09054399530092876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,128,0,1,float16,float16,0,0.0909493366877238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,128,0,1,float16,float16,0,0.07962666451931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,128,0,1,float16,fp8,0,0.07865066826343536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,128,0,1,float16,float16,0,0.07894933223724365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,128,0,1,fp8,fp8,0,0.07238933444023132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,128,0,1,float16,fp8,0,0.07970133423805237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,128,0,1,fp8,fp8,0,0.07063466807206471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,128,0,1,float16,float16,0,0.07914666831493378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,128,0,1,float16,fp8,0,0.07924266656239827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,128,0,1,fp8,fp8,0,0.07237866520881653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,128,0,1,float16,float16,0,0.07891199986139934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,128,0,1,float16,fp8,0,0.07969599962234497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,128,0,1,fp8,fp8,0,0.07448533177375793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,128,0,1,float16,float16,0,0.05431999762852987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,128,0,1,float16,fp8,0,0.05380799869696299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,128,0,1,fp8,fp8,0,0.05183466772238413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,128,0,1,float16,float16,0,0.054085334142049156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,128,0,1,float16,fp8,0,0.052111998200416565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,128,0,1,fp8,fp8,0,0.04887466629346212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,128,0,1,float16,float16,0,0.054229333996772766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,128,0,1,float16,fp8,0,0.05298133194446564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,128,0,1,fp8,fp8,0,0.04879466692606608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,128,0,1,float16,float16,0,0.05376533170541128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,128,0,1,float16,fp8,0,0.05354666709899902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,128,0,1,fp8,fp8,0,0.04900266726811727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,128,0,1,float16,float16,0,0.053786665201187134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,128,0,1,float16,fp8,0,0.05222400029500326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,128,0,1,fp8,fp8,0,0.04801600178082784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,128,0,1,float16,float16,0,0.03761066744724909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,128,0,1,float16,fp8,0,0.037418665985266365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,128,0,1,fp8,fp8,0,0.03572266548871994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,128,0,1,float16,float16,0,0.03562133262554804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,128,0,1,float16,fp8,0,0.03748266647259394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,128,0,1,fp8,fp8,0,0.033557333052158356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,128,0,1,float16,float16,0,0.036618667344252266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,128,0,1,float16,fp8,0,0.037530665596326195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,128,0,1,fp8,fp8,0,0.0336053321758906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,128,0,1,float16,float16,0,0.0378506655494372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,128,0,1,float16,fp8,0,0.03783999880154928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,128,0,1,fp8,fp8,0,0.035599999129772186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,128,0,1,float16,float16,0,0.0359199990828832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,128,0,1,float16,fp8,0,0.037477334340413414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,128,0,1,fp8,fp8,0,0.03610666592915853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,128,0,1,float16,float16,0,1.4900479316711426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,128,0,1,fp8,fp8,0,1.4411147435506184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,128,0,1,float16,fp8,0,1.4870932896931965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,128,0,1,float16,float16,0,1.5538613001505535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,128,0,1,float16,fp8,0,1.5631999969482422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,128,0,1,fp8,fp8,0,1.5864639282226562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,128,0,1,float16,float16,0,1.5793066024780273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,128,0,1,float16,fp8,0,1.5986720720926921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,128,0,1,fp8,fp8,0,1.6089173952738445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,128,0,1,float16,float16,0,0.9009599685668945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,128,0,1,float16,float16,0,1.6644266446431477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,128,0,1,float16,fp8,0,1.6593653361002605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,128,0,1,fp8,fp8,0,1.6279093424479167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,128,0,1,float16,fp8,0,0.8857226371765137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,128,0,1,fp8,fp8,0,0.8900852998097738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,128,0,1,float16,float16,0,0.7603413263956705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,128,0,1,float16,fp8,0,0.758949359258016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,128,0,1,fp8,fp8,0,0.7462026278177897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,128,0,1,float16,float16,0,0.7757226626078287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,128,0,1,float16,fp8,0,0.7798986434936523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,128,0,1,fp8,fp8,0,0.7431786855061849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,128,0,1,float16,float16,0,0.7890506585439047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,128,0,1,float16,fp8,0,0.7848479747772217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,128,0,1,fp8,fp8,0,0.7746880054473877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,128,0,1,float16,float16,0,0.8216479619344076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,128,0,1,float16,fp8,0,0.8103893597920736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,128,0,1,float16,float16,0,0.46395734945933026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,128,0,1,float16,fp8,0,0.45260798931121826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,128,0,1,fp8,fp8,0,0.8203306992848715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,128,0,1,fp8,fp8,0,0.4551626841227214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,128,0,1,float16,float16,0,0.39417600631713867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,128,0,1,float16,fp8,0,0.39185067017873126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,128,0,1,fp8,fp8,0,0.3770773410797119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,128,0,1,float16,float16,0,0.40051201979319256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,128,0,1,float16,fp8,0,0.40143465995788574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,128,0,1,fp8,fp8,0,0.3810400168100993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,128,0,1,float16,float16,0,0.4049439827601115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,128,0,1,float16,fp8,0,0.4043999910354614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,128,0,1,fp8,fp8,0,0.390938679377238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,128,0,1,float16,float16,0,0.4197760025660197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,128,0,1,float16,fp8,0,0.41678933302561444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,128,0,1,float16,float16,0,0.24919466177622476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,128,0,1,float16,fp8,0,0.23990933100382486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,128,0,1,fp8,fp8,0,0.40349864959716797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,128,0,1,fp8,fp8,0,0.2411200006802877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,128,0,1,float16,float16,0,0.20829866329828897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,128,0,1,float16,fp8,0,0.20834134022394815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,128,0,1,fp8,fp8,0,0.1939093271891276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,128,0,1,float16,fp8,0,0.21040532986323038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,128,0,1,float16,float16,0,0.21130132675170898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,128,0,1,fp8,fp8,0,0.2021440068880717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,128,0,1,float16,float16,0,0.2123039960861206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,128,0,1,float16,fp8,0,0.21345599492390951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,128,0,1,fp8,fp8,0,0.20518932739893594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,128,0,1,float16,fp8,0,0.21758933862050375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,128,0,1,float16,float16,0,0.220085342725118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,128,0,1,float16,float16,0,0.13406933347384134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,128,0,1,fp8,fp8,0,0.21384000778198242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,128,0,1,float16,fp8,0,0.1306880017121633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,128,0,1,fp8,fp8,0,0.1311253309249878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,128,0,1,float16,float16,0,0.11071466406186421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,128,0,1,fp8,fp8,0,0.10528533657391866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,128,0,1,float16,fp8,0,0.11236266295115153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,128,0,1,float16,float16,0,0.11369599898656209
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,128,0,1,float16,fp8,0,0.113045334815979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,128,0,1,fp8,fp8,0,0.10948266585667928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,128,0,1,float16,fp8,0,0.11616533001263936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,128,0,1,float16,float16,0,0.11441066861152649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,128,0,1,fp8,fp8,0,0.11319999893506368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,128,0,1,float16,float16,0,0.12125866611798604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,128,0,1,float16,fp8,0,0.11941867073376973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,128,0,1,fp8,fp8,0,0.11803733309110005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,128,0,1,float16,float16,0,0.07789333164691925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,128,0,1,float16,fp8,0,0.07592533528804779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,128,0,1,fp8,fp8,0,0.07809600234031677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,128,0,1,float16,float16,0,0.06619200110435486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,128,0,1,float16,fp8,0,0.06628799935181935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,128,0,1,fp8,fp8,0,0.0602453351020813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,128,0,1,float16,float16,0,0.06620266536871593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,128,0,1,float16,fp8,0,0.0664106657107671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,128,0,1,fp8,fp8,0,0.06066666543483734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,128,0,1,float16,float16,0,0.06646933158238728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,128,0,1,float16,fp8,0,0.06632000207901001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,128,0,1,fp8,fp8,0,0.06058666606744131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,128,0,1,float16,float16,0,0.06850666801134746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,128,0,1,float16,fp8,0,0.0670773337284724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,128,0,1,fp8,fp8,0,0.06493866443634033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,128,0,1,float16,float16,0,0.04636266827583313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,128,0,1,float16,fp8,0,0.04665599763393402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,128,0,1,fp8,fp8,0,0.04422399898370107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,128,0,1,float16,float16,0,0.043807998299598694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,128,0,1,float16,fp8,0,0.04567466676235199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,128,0,1,fp8,fp8,0,0.04166933397452036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,128,0,1,float16,float16,0,0.04374399781227112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,128,0,1,float16,fp8,0,0.04398400088151296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,128,0,1,fp8,fp8,0,0.04195733368396759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,128,0,1,float16,float16,0,0.04568000137805939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,128,0,1,float16,fp8,0,0.04384533564249674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,128,0,1,fp8,fp8,0,0.0414986660083135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,128,0,1,float16,float16,0,0.045824001232783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,128,0,1,float16,fp8,0,0.045909335215886436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,128,0,1,fp8,fp8,0,0.041562666495641075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,128,0,1,float16,float16,0,0.03133333226044973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,128,0,1,float16,fp8,0,0.031632001201311745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,128,0,1,fp8,fp8,0,0.031632001201311745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,128,0,1,float16,float16,0,0.03145600110292435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,128,0,1,float16,fp8,0,0.031290667752424874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,128,0,1,fp8,fp8,0,0.029301332930723827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,128,0,1,float16,float16,0,0.02937600016593933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,128,0,1,float16,fp8,0,0.03126399964094162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,128,0,1,fp8,fp8,0,0.029120000700155895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,128,0,1,float16,float16,0,0.029504001140594482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,128,0,1,float16,fp8,0,0.03175999969244003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,128,0,1,fp8,fp8,0,0.02951466788848241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,128,0,1,float16,float16,0,0.03089066594839096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,128,0,1,float16,fp8,0,0.0315786674618721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,128,0,1,fp8,fp8,0,0.02959466725587845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,128,0,1,float16,float16,0,0.02741333345572154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,128,0,1,float16,fp8,0,0.02957333376010259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,128,0,1,float16,float16,0,0.027221334477265675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,128,0,1,fp8,fp8,0,0.027093333502610523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,128,0,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,128,0,1,fp8,fp8,0,0.027189334233601887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,128,0,1,float16,float16,0,0.02759466568628947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,128,0,1,float16,fp8,0,0.028949332733949024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,128,0,1,fp8,fp8,0,0.027237333357334137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,128,0,1,float16,float16,0,0.028560000161329906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,128,0,1,float16,fp8,0,0.029258665939172108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,128,0,1,fp8,fp8,0,0.027552001178264618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,128,0,1,float16,float16,0,0.029311999678611755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,128,0,1,float16,fp8,0,0.029253333806991577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,128,0,1,fp8,fp8,0,0.0273333340883255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,128,0,1,float16,fp8,0,0.6782986323038737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,128,0,1,float16,float16,0,0.678266684214274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,128,0,1,fp8,fp8,0,0.6679786841074625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,128,0,1,float16,float16,0,0.6915253003438314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,128,0,1,float16,fp8,0,0.6966293652852377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,128,0,1,fp8,fp8,0,0.682032028834025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,128,0,1,float16,fp8,0,0.6993066469828287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,128,0,1,float16,float16,0,0.702351967493693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,128,0,1,fp8,fp8,0,0.6978507041931152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,128,0,1,float16,float16,0,0.4235893487930298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,128,0,1,float16,fp8,0,0.730522632598877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,128,0,1,fp8,fp8,0,0.7662080128987631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,128,0,1,float16,float16,0,0.7373867034912109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,128,0,1,float16,fp8,0,0.4138186772664388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,128,0,1,float16,float16,0,0.3495466709136963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,128,0,1,fp8,fp8,0,0.4232693513234456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,128,0,1,float16,fp8,0,0.34862399101257324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,128,0,1,fp8,fp8,0,0.34348801771799725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,128,0,1,float16,float16,0,0.35889601707458496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,128,0,1,float16,fp8,0,0.3548693259557088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,128,0,1,fp8,fp8,0,0.34755198160807294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,128,0,1,float16,float16,0,0.360858678817749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,128,0,1,float16,fp8,0,0.3625760078430176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,128,0,1,fp8,fp8,0,0.35394132137298584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,128,0,1,float16,float16,0,0.3768426577250163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,128,0,1,float16,fp8,0,0.3721706469853719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,128,0,1,float16,float16,0,0.22497065862019858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,128,0,1,float16,fp8,0,0.21997332572937012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,128,0,1,fp8,fp8,0,0.37291733423868817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,128,0,1,fp8,fp8,0,0.22387200593948364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,128,0,1,float16,float16,0,0.18699200948079428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,128,0,1,fp8,fp8,0,0.17761067549387613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,128,0,1,float16,float16,0,0.18989866971969604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,128,0,1,float16,fp8,0,0.18801599740982056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,128,0,1,float16,fp8,0,0.18921067317326865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,128,0,1,fp8,fp8,0,0.18421334028244019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,128,0,1,float16,float16,0,0.19365866978963217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,128,0,1,float16,fp8,0,0.19313599665959677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,128,0,1,fp8,fp8,0,0.1885653336842855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,128,0,1,float16,float16,0,0.19967466592788696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,128,0,1,float16,fp8,0,0.19988266626993814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,128,0,1,fp8,fp8,0,0.195743997891744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,128,0,1,float16,float16,0,0.12522666652997336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,128,0,1,float16,fp8,0,0.12288533647855122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,128,0,1,fp8,fp8,0,0.12309867143630981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,128,0,1,float16,float16,0,0.10272533694903056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,128,0,1,float16,fp8,0,0.10442133744557698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,128,0,1,fp8,fp8,0,0.09570667147636414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,128,0,1,float16,float16,0,0.10355732838312785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,128,0,1,float16,fp8,0,0.10514133175214131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,128,0,1,fp8,fp8,0,0.10043199857076009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,128,0,1,float16,float16,0,0.10629866520563762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,128,0,1,float16,fp8,0,0.10545066992441814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,128,0,1,fp8,fp8,0,0.10468266407648723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,128,0,1,float16,float16,0,0.11114133397738139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,128,0,1,float16,fp8,0,0.11173866192499797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,128,0,1,fp8,fp8,0,0.11013866464296977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,128,0,1,float16,float16,0,0.06963733335336049
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,128,0,1,float16,fp8,0,0.0687253326177597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,128,0,1,fp8,fp8,0,0.07239999870459239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,128,0,1,float16,float16,0,0.06030400097370148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,128,0,1,float16,fp8,0,0.06052800019582113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,128,0,1,fp8,fp8,0,0.05535466472307841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,128,0,1,float16,float16,0,0.06000000238418579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,128,0,1,float16,fp8,0,0.060090666015942894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,128,0,1,fp8,fp8,0,0.056421334544817604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,128,0,1,float16,float16,0,0.06150933106740316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,128,0,1,float16,fp8,0,0.06010666489601135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,128,0,1,fp8,fp8,0,0.056501333912213646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,128,0,1,float16,float16,0,0.06243200103441874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,128,0,1,float16,fp8,0,0.06126399834950765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,128,0,1,float16,float16,0,0.04348800083001455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,128,0,1,fp8,fp8,0,0.060378665725390114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,128,0,1,float16,fp8,0,0.04379733403523763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,128,0,1,fp8,fp8,0,0.04095466683308283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,128,0,1,float16,float16,0,0.04181866844495138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,128,0,1,float16,fp8,0,0.041690667470296226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,128,0,1,fp8,fp8,0,0.03757333258787791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,128,0,1,float16,float16,0,0.04275199770927429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,128,0,1,float16,fp8,0,0.04190400242805481
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,128,0,1,fp8,fp8,0,0.037690666814645134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,128,0,1,float16,float16,0,0.04153066625197729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,128,0,1,float16,fp8,0,0.041663999358812966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,128,0,1,fp8,fp8,0,0.03886933376391729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,128,0,1,float16,float16,0,0.04106666644414266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,128,0,1,float16,fp8,0,0.04188266893227895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,128,0,1,float16,float16,0,0.02959466725587845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,128,0,1,fp8,fp8,0,0.03939733405907949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,128,0,1,float16,fp8,0,0.029552000264326733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,128,0,1,fp8,fp8,0,0.02792533238728841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,128,0,1,float16,float16,0,0.027322667340437572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,128,0,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,128,0,1,float16,float16,0,0.02752000093460083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,128,0,1,float16,fp8,0,0.02773333340883255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,128,0,1,fp8,fp8,0,0.027402666707833607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,128,0,1,fp8,fp8,0,0.026047999660174053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,128,0,1,float16,float16,0,0.027813332776228588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,128,0,1,float16,fp8,0,0.02959466725587845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,128,0,1,fp8,fp8,0,0.027482666075229645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,128,0,1,float16,float16,0,0.02739199995994568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,128,0,1,float16,fp8,0,0.027664000789324444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,128,0,1,fp8,fp8,0,0.02938133229811986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,128,0,1,float16,fp8,0,0.025472000241279602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,128,0,1,float16,float16,0,0.025685332715511322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,128,0,1,fp8,fp8,0,0.025455998877684276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,128,0,1,float16,float16,0,0.025199999411900837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,128,0,1,float16,fp8,0,0.025221332907676697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,128,0,1,fp8,fp8,0,0.023562667270501454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,128,0,1,float16,float16,0,0.025434667865435284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,128,0,1,float16,fp8,0,0.025263999899228413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,128,0,1,fp8,fp8,0,0.02333866556485494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,128,0,1,float16,float16,0,0.02536533276240031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,128,0,1,float16,fp8,0,0.025562666356563568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,128,0,1,fp8,fp8,0,0.025034666061401367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,128,0,1,float16,float16,0,0.02550933261712392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,128,0,1,float16,fp8,0,0.02659733345111211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,128,0,1,fp8,fp8,0,0.025466665625572205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,128,0,1,float16,float16,0,0.023050665855407715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,128,0,1,float16,fp8,0,0.023157333334287006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,128,0,1,fp8,fp8,0,0.021514666577180225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,128,0,1,float16,float16,0,0.021525333325068157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,128,0,1,float16,fp8,0,0.02239466706911723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,128,0,1,fp8,fp8,0,0.02144533395767212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,128,0,1,float16,float16,0,0.023242667317390442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,128,0,1,float16,fp8,0,0.022319999833901722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,128,0,1,fp8,fp8,0,0.02093333254257838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,128,0,1,float16,float16,0,0.02332266668478648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,128,0,1,float16,fp8,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,128,0,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,128,0,1,float16,fp8,0,0.023311999936898548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,128,0,1,float16,float16,0,0.02330133318901062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,128,0,1,fp8,fp8,0,0.021226666867733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,128,0,1,float16,float16,0,0.37510399023691815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,128,0,1,float16,fp8,0,0.3752479950586955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,128,0,1,float16,float16,0,0.3794879913330078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,128,0,1,fp8,fp8,0,0.3721439838409424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,128,0,1,float16,fp8,0,0.38239999612172443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,128,0,1,fp8,fp8,0,0.3776106834411621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,128,0,1,float16,float16,0,0.38973867893218994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,128,0,1,float16,fp8,0,0.3870613177617391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,128,0,1,fp8,fp8,0,0.3837920029958089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,128,0,1,float16,float16,0,0.39774401982625324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,128,0,1,float16,fp8,0,0.3978506724039714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,128,0,1,float16,float16,0,0.23373333613077799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,128,0,1,fp8,fp8,0,0.40607468287150067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,128,0,1,fp8,fp8,0,0.23498133818308511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,128,0,1,float16,fp8,0,0.23029865821202597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,128,0,1,float16,float16,0,0.19840532541275024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,128,0,1,float16,fp8,0,0.19802133242289224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,128,0,1,fp8,fp8,0,0.19207467635472616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,128,0,1,float16,float16,0,0.20132799943288168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,128,0,1,float16,fp8,0,0.20149866739908853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,128,0,1,fp8,fp8,0,0.19882667064666748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,128,0,1,float16,float16,0,0.20373332500457764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,128,0,1,float16,fp8,0,0.20147732893625894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,128,0,1,fp8,fp8,0,0.19897600015004477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,128,0,1,float16,float16,0,0.20943999290466309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,128,0,1,float16,fp8,0,0.20722667376200357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,128,0,1,float16,float16,0,0.12590400377909342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,128,0,1,fp8,fp8,0,0.2084640065828959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,128,0,1,float16,fp8,0,0.12426666418711345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,128,0,1,fp8,fp8,0,0.12844799955685934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,128,0,1,float16,float16,0,0.10788266857465108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,128,0,1,float16,fp8,0,0.10886399944623311
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,128,0,1,fp8,fp8,0,0.10211732983589172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,128,0,1,float16,float16,0,0.10961066683133443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,128,0,1,float16,fp8,0,0.10982400178909302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,128,0,1,fp8,fp8,0,0.10674132903416951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,128,0,1,float16,float16,0,0.10966400305430095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,128,0,1,float16,fp8,0,0.11043199896812439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,128,0,1,fp8,fp8,0,0.11000000437100728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,128,0,1,float16,float16,0,0.11351466178894043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,128,0,1,float16,fp8,0,0.11281067132949829
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,128,0,1,float16,float16,0,0.070933332045873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,128,0,1,fp8,fp8,0,0.11574932932853699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,128,0,1,float16,fp8,0,0.07037866612275441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,128,0,1,fp8,fp8,0,0.07459733386834462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,128,0,1,float16,float16,0,0.06187733511130015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,128,0,1,float16,fp8,0,0.06241066753864288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,128,0,1,fp8,fp8,0,0.05860800047715505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,128,0,1,float16,float16,0,0.06257066627343495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,128,0,1,float16,fp8,0,0.062133332093556724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,128,0,1,fp8,fp8,0,0.0584746648867925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,128,0,1,float16,float16,0,0.06427200138568878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,128,0,1,float16,fp8,0,0.0633546660343806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,128,0,1,fp8,fp8,0,0.05956799785296122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,128,0,1,float16,float16,0,0.06472533444563548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,128,0,1,float16,fp8,0,0.06454400221506755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,128,0,1,fp8,fp8,0,0.06239999830722809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,128,0,1,float16,float16,0,0.04186133543650309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,128,0,1,float16,fp8,0,0.04167999823888143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,128,0,1,fp8,fp8,0,0.03958933303753535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,128,0,1,float16,float16,0,0.03976000100374222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,128,0,1,float16,fp8,0,0.0395413339138031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,128,0,1,fp8,fp8,0,0.03600533306598663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,128,0,1,float16,float16,0,0.03945599993069967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,128,0,1,float16,fp8,0,0.03946666667858759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,128,0,1,fp8,fp8,0,0.03642666588226954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,128,0,1,float16,float16,0,0.03982933362325033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,128,0,1,float16,fp8,0,0.039674667020638786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,128,0,1,fp8,fp8,0,0.037658666570981346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,128,0,1,float16,float16,0,0.039642666776975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,128,0,1,float16,fp8,0,0.03947199881076813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,128,0,1,float16,float16,0,0.029322666426499683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,128,0,1,fp8,fp8,0,0.0376800000667572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,128,0,1,float16,fp8,0,0.03028800090154012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,128,0,1,fp8,fp8,0,0.029535998900731403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,128,0,1,float16,float16,0,0.029253333806991577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,128,0,1,float16,fp8,0,0.029290666182835896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,128,0,1,float16,float16,0,0.02759466568628947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,128,0,1,float16,fp8,0,0.027242665489514668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,128,0,1,fp8,fp8,0,0.027615999182065327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,128,0,1,fp8,fp8,0,0.028165332973003387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,128,0,1,float16,float16,0,0.02754133443037669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,128,0,1,float16,fp8,0,0.02942933390537898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,128,0,1,fp8,fp8,0,0.029264000554879505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,128,0,1,float16,float16,0,0.03010133405526479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,128,0,1,float16,fp8,0,0.02943466603755951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,128,0,1,float16,float16,0,0.02348266790310542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,128,0,1,fp8,fp8,0,0.029274667302767437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,128,0,1,float16,fp8,0,0.02201066662867864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,128,0,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,128,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,128,0,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,128,0,1,fp8,fp8,0,0.02103466788927714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,128,0,1,float16,float16,0,0.02145066608985265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,128,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,128,0,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,128,0,1,float16,float16,0,0.02176533391078313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,128,0,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,128,0,1,fp8,fp8,0,0.021381333470344543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,128,0,1,float16,float16,0,0.023082666099071503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,128,0,1,float16,fp8,0,0.030245333909988403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,128,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,128,0,1,fp8,fp8,0,0.022730665902296703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,128,0,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,128,0,1,float16,float16,0,0.019013332823912304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,128,0,1,float16,fp8,0,0.018757333358128864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,128,0,1,fp8,fp8,0,0.019482667247454327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,128,0,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,128,0,1,fp8,fp8,0,0.01786133274435997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,128,0,1,float16,float16,0,0.0174346665541331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,128,0,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,128,0,1,fp8,fp8,0,0.017840000490347546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,128,0,1,float16,float16,0,0.018624000251293182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,128,0,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,128,0,1,fp8,fp8,0,0.017717332889636356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,128,0,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,128,0,1,fp8,fp8,0,0.017717332889636356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,128,0,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,128,0,1,float16,float16,0,0.0194560003777345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,128,0,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,128,0,1,float16,float16,0,0.018933333456516266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,128,0,1,float16,fp8,0,0.01952533299724261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,128,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,128,0,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,128,0,1,fp8,fp8,0,0.017488000293572743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,128,0,1,float16,float16,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,128,0,1,float16,fp8,0,0.01950399950146675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,128,0,1,fp8,fp8,0,0.01811733345190684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,128,0,1,float16,float16,0,0.24844266970952353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,128,0,1,fp8,fp8,0,0.23865600426991782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,128,0,1,float16,fp8,0,0.24754667282104492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,128,0,1,float16,float16,0,0.25271467367808026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,128,0,1,float16,fp8,0,0.2505493362744649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,128,0,1,fp8,fp8,0,0.24490133921305338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,128,0,1,float16,float16,0,0.25147199630737305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,128,0,1,float16,fp8,0,0.2516319950421651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,128,0,1,fp8,fp8,0,0.2475200096766154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,128,0,1,float16,float16,0,0.2592586676279704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,128,0,1,float16,fp8,0,0.2566986680030823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,128,0,1,fp8,fp8,0,0.2564693291982015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,128,0,1,float16,float16,0,0.15028267105420431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,128,0,1,float16,fp8,0,0.14775466918945312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,128,0,1,fp8,fp8,0,0.15244799852371216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,128,0,1,float16,float16,0,0.13405332962671915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,128,0,1,float16,fp8,0,0.1320746640364329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,128,0,1,fp8,fp8,0,0.12644267082214355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,128,0,1,float16,float16,0,0.13571733236312866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,128,0,1,float16,fp8,0,0.1327786644299825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,128,0,1,fp8,fp8,0,0.13004799683888754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,128,0,1,float16,float16,0,0.1358026663462321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,128,0,1,float16,fp8,0,0.13483200470606485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,128,0,1,fp8,fp8,0,0.13173866271972656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,128,0,1,float16,float16,0,0.13852266470591226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,128,0,1,float16,fp8,0,0.1378720005353292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,128,0,1,float16,float16,0,0.08433066805203755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,128,0,1,fp8,fp8,0,0.13838932911554971
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,128,0,1,float16,fp8,0,0.08268799881140391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,128,0,1,fp8,fp8,0,0.08560533324877422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,128,0,1,float16,float16,0,0.0747680018345515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,128,0,1,float16,fp8,0,0.07487999896208446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,128,0,1,fp8,fp8,0,0.0689279983441035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,128,0,1,float16,float16,0,0.07551999886830647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,128,0,1,float16,fp8,0,0.07427200178305308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,128,0,1,fp8,fp8,0,0.0690773328145345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,128,0,1,float16,fp8,0,0.07428800066312154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,128,0,1,float16,float16,0,0.07527466615041097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,128,0,1,fp8,fp8,0,0.06992533306280772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,128,0,1,float16,float16,0,0.0763733337322871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,128,0,1,float16,fp8,0,0.07493333518505096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,128,0,1,fp8,fp8,0,0.07423999905586243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,128,0,1,float16,float16,0,0.04789333542188009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,128,0,1,float16,fp8,0,0.04753600060939789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,128,0,1,fp8,fp8,0,0.047055999437967934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,128,0,1,float16,float16,0,0.04572799801826477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,128,0,1,float16,fp8,0,0.04560000201066335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,128,0,1,fp8,fp8,0,0.043365334471066795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,128,0,1,fp8,fp8,0,0.04358933369318644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,128,0,1,float16,fp8,0,0.04555733501911163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,128,0,1,float16,float16,0,0.04597333570321401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,128,0,1,float16,float16,0,0.04642133414745331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,128,0,1,fp8,fp8,0,0.04278933505217234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,128,0,1,float16,fp8,0,0.04561600089073181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,128,0,1,float16,float16,0,0.0469706654548645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,128,0,1,float16,fp8,0,0.04643199841181437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,128,0,1,float16,float16,0,0.03163733333349228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,128,0,1,fp8,fp8,0,0.043562665581703186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,128,0,1,float16,fp8,0,0.03150933235883713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,128,0,1,fp8,fp8,0,0.03158933420976003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,128,0,1,float16,float16,0,0.02977599948644638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,128,0,1,float16,fp8,0,0.031370667119820915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,128,0,1,fp8,fp8,0,0.029333333174387615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,128,0,1,float16,fp8,0,0.03145066648721695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,128,0,1,fp8,fp8,0,0.02956799914439519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,128,0,1,float16,float16,0,0.03181866556406021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,128,0,1,float16,float16,0,0.03178133318821589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,128,0,1,float16,fp8,0,0.03126933425664902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,128,0,1,fp8,fp8,0,0.029546665648619335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,128,0,1,float16,float16,0,0.029738667110602062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,128,0,1,float16,fp8,0,0.03194666653871536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,128,0,1,float16,float16,0,0.02474133421977361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,128,0,1,fp8,fp8,0,0.029669334491093952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,128,0,1,float16,fp8,0,0.02430933217207591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,128,0,1,fp8,fp8,0,0.02608533451954524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,128,0,1,float16,float16,0,0.02378133436044057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,128,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,128,0,1,fp8,fp8,0,0.02325333406527837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,128,0,1,float16,float16,0,0.023552000522613525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,128,0,1,float16,fp8,0,0.023215999205907185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,128,0,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,128,0,1,float16,float16,0,0.0233599990606308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,128,0,1,float16,fp8,0,0.02325333406527837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,128,0,1,fp8,fp8,0,0.0234400009115537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,128,0,1,float16,float16,0,0.024112001061439514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,128,0,1,float16,fp8,0,0.025477332373460133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,128,0,1,fp8,fp8,0,0.025279998779296875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,128,0,1,float16,float16,0,0.017397332936525345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,128,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,128,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,128,0,1,float16,float16,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,128,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,128,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,128,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,128,0,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,128,0,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,128,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,128,0,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,128,0,1,float16,float16,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,128,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,128,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,128,0,1,float16,float16,0,0.017386666188637417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,128,0,1,float16,fp8,0,0.018709332992633183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,128,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,128,0,1,float16,float16,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,128,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,128,0,1,float16,float16,0,0.0173333336909612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,128,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,128,0,1,float16,float16,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,128,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,128,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,128,0,1,fp8,fp8,0,0.017386666188637417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,128,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,128,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,128,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,128,0,1,float16,float16,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,128,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,128,0,1,float16,float16,0,0.017525333911180496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,128,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,128,0,1,float16,fp8,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,128,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,128,0,1,float16,float16,0,0.1835520068804423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,128,0,1,float16,fp8,0,0.18380266427993774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,128,0,1,fp8,fp8,0,0.17299199104309082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,128,0,1,float16,float16,0,0.183786670366923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,128,0,1,float16,fp8,0,0.18480533361434937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,128,0,1,fp8,fp8,0,0.17933867375055948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,128,0,1,float16,float16,0,0.18437333901723227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,128,0,1,float16,fp8,0,0.1851466695467631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,128,0,1,fp8,fp8,0,0.18126932779947916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,128,0,1,float16,float16,0,0.18872533241907755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,128,0,1,float16,fp8,0,0.18733332554499307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,128,0,1,fp8,fp8,0,0.18661866585413614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,128,0,1,float16,float16,0,0.1088053286075592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,128,0,1,float16,fp8,0,0.10809600353240967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,128,0,1,fp8,fp8,0,0.10946666200955708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,128,0,1,float16,float16,0,0.10120532910029094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,128,0,1,float16,fp8,0,0.1006773312886556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,128,0,1,fp8,fp8,0,0.09309867024421692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,128,0,1,float16,float16,0,0.10076799988746643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,128,0,1,float16,fp8,0,0.09963200489679973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,128,0,1,fp8,fp8,0,0.09331199526786804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,128,0,1,float16,float16,0,0.09980799754460652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,128,0,1,float16,fp8,0,0.09985066453615825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,128,0,1,float16,float16,0,0.10128532846768697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,128,0,1,fp8,fp8,0,0.0942026674747467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,128,0,1,float16,fp8,0,0.10088533163070679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,128,0,1,fp8,fp8,0,0.09809066851933797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,128,0,1,float16,float16,0,0.06001600126425425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,128,0,1,fp8,fp8,0,0.058415999015172325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,128,0,1,float16,fp8,0,0.060864001512527466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,128,0,1,float16,float16,0,0.057855998476346336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,128,0,1,float16,fp8,0,0.05813866853713989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,128,0,1,fp8,fp8,0,0.054383998115857445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,128,0,1,float16,fp8,0,0.058149332801500954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,128,0,1,fp8,fp8,0,0.05605333546797434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,128,0,1,float16,float16,0,0.05846933523813883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,128,0,1,float16,float16,0,0.058362667759259544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,128,0,1,float16,fp8,0,0.05827199916044871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,128,0,1,fp8,fp8,0,0.055946667989095054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,128,0,1,float16,float16,0,0.05836800237496694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,128,0,1,float16,fp8,0,0.05842666824658712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,128,0,1,float16,float16,0,0.03773866593837738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,128,0,1,fp8,fp8,0,0.05570666491985321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,128,0,1,fp8,fp8,0,0.03739733248949051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,128,0,1,float16,fp8,0,0.038890667259693146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,128,0,1,float16,float16,0,0.03565866748491923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,128,0,1,float16,fp8,0,0.03615466753641764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,128,0,1,fp8,fp8,0,0.03619199991226196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,128,0,1,float16,float16,0,0.03721066564321518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,128,0,1,float16,fp8,0,0.03734933336575826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,128,0,1,float16,float16,0,0.03754133234421412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,128,0,1,fp8,fp8,0,0.03590933233499527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,128,0,1,float16,fp8,0,0.0379573330283165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,128,0,1,fp8,fp8,0,0.03736533224582672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,128,0,1,float16,float16,0,0.03788800040880839
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,128,0,1,float16,fp8,0,0.03818666686614355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,128,0,1,fp8,fp8,0,0.03770133356253306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,128,0,1,float16,float16,0,0.0278613343834877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,128,0,1,float16,fp8,0,0.027482666075229645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,128,0,1,fp8,fp8,0,0.027301333844661713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,128,0,1,float16,fp8,0,0.02537599951028824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,128,0,1,float16,float16,0,0.027722666660944622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,128,0,1,fp8,fp8,0,0.025274666647116344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,128,0,1,float16,float16,0,0.025237334271272022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,128,0,1,float16,fp8,0,0.027530667682488758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,128,0,1,fp8,fp8,0,0.025253333151340485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,128,0,1,float16,float16,0,0.025493333737055462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,128,0,1,float16,fp8,0,0.02777066578467687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,128,0,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,128,0,1,float16,float16,0,0.027162666122118633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,128,0,1,float16,fp8,0,0.02807466685771942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,128,0,1,fp8,fp8,0,0.02743999908367793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,128,0,1,float16,float16,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,128,0,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,128,0,1,float16,fp8,0,0.021322667598724365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,128,0,1,float16,float16,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,128,0,1,float16,fp8,0,0.019845332950353622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,128,0,1,fp8,fp8,0,0.019546666493018467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,128,0,1,float16,float16,0,0.021082667013009388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,128,0,1,float16,fp8,0,0.020090666910012562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,128,0,1,fp8,fp8,0,0.01952533299724261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,128,0,1,float16,float16,0,0.021295999487241108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,128,0,1,float16,fp8,0,0.019685332973798115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,128,0,1,fp8,fp8,0,0.021210665504137676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,128,0,1,float16,float16,0,0.01934933289885521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,128,0,1,float16,fp8,0,0.020768000433842342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,128,0,1,fp8,fp8,0,0.021274665991465252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,128,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,128,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,128,0,1,fp8,fp8,0,0.017765333255132038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,128,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,128,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,128,0,1,float16,float16,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,128,0,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,128,0,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,128,0,1,float16,float16,0,0.01563199982047081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,128,0,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,128,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,128,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,128,0,1,float16,float16,0,0.01573866605758667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,128,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,128,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,128,0,1,fp8,fp8,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,128,0,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,128,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,128,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,128,0,1,float16,float16,0,0.015423999478419622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,128,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,128,0,1,fp8,fp8,0,0.015935999651749928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,128,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,128,0,1,fp8,fp8,0,0.01581866666674614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,128,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,128,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,128,0,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,128,0,1,float16,float16,0,0.1530346671740214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,128,0,1,float16,fp8,0,0.1523413360118866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,128,0,1,fp8,fp8,0,0.14517866571744284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,128,0,1,float16,float16,0,0.1518826683362325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,128,0,1,float16,fp8,0,0.15061333775520325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,128,0,1,fp8,fp8,0,0.14471999804178873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,128,0,1,float16,float16,0,0.15205867091814676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,128,0,1,float16,fp8,0,0.1529973347981771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,128,0,1,fp8,fp8,0,0.14628266294797262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,128,0,1,float16,float16,0,0.08541333675384521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,128,0,1,float16,fp8,0,0.15474133690198263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,128,0,1,fp8,fp8,0,0.1492533286412557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,128,0,1,float16,float16,0,0.1530186633268992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,128,0,1,float16,fp8,0,0.08628799517949422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,128,0,1,fp8,fp8,0,0.08518933256467183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,128,0,1,float16,float16,0,0.08392000198364258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,128,0,1,float16,fp8,0,0.08436800042788188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,128,0,1,fp8,fp8,0,0.08117333551247914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,128,0,1,float16,fp8,0,0.08463999629020691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,128,0,1,float16,float16,0,0.08378133177757263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,128,0,1,fp8,fp8,0,0.08084799846013387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,128,0,1,float16,float16,0,0.0842026670773824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,128,0,1,fp8,fp8,0,0.08086400230725606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,128,0,1,float16,fp8,0,0.08434666196505229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,128,0,1,float16,float16,0,0.08356266220410664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,128,0,1,float16,fp8,0,0.08533866206804912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,128,0,1,fp8,fp8,0,0.08090666433175404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,128,0,1,float16,float16,0,0.05165866514046987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,128,0,1,float16,fp8,0,0.05178666611512502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,128,0,1,fp8,fp8,0,0.05017599960168203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,128,0,1,float16,float16,0,0.05040533343950907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,128,0,1,float16,fp8,0,0.0498933345079422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,128,0,1,float16,float16,0,0.04974400003751119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,128,0,1,fp8,fp8,0,0.048154667019844055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,128,0,1,float16,fp8,0,0.05007466673851013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,128,0,1,float16,float16,0,0.05009066561857859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,128,0,1,fp8,fp8,0,0.04930666585763296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,128,0,1,float16,fp8,0,0.04981866478919983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,128,0,1,fp8,fp8,0,0.04987733562787374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,128,0,1,float16,float16,0,0.05043200155099233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,128,0,1,float16,fp8,0,0.04994133114814758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,128,0,1,fp8,fp8,0,0.04987733562787374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,128,0,1,float16,fp8,0,0.033610666791598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,128,0,1,float16,float16,0,0.03366933266321818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,128,0,1,float16,float16,0,0.03309333324432373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,128,0,1,fp8,fp8,0,0.03506666670242945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,128,0,1,float16,fp8,0,0.03196800003449122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,128,0,1,fp8,fp8,0,0.03366400053103765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,128,0,1,float16,float16,0,0.033530667424201965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,128,0,1,float16,fp8,0,0.031856000423431396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,128,0,1,fp8,fp8,0,0.033626665671666466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,128,0,1,float16,float16,0,0.033600000043710075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,128,0,1,float16,fp8,0,0.03363200028737386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,128,0,1,fp8,fp8,0,0.03358400116364161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,128,0,1,float16,float16,0,0.033573334415753685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,128,0,1,float16,fp8,0,0.03365333378314972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,128,0,1,fp8,fp8,0,0.0339626669883728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,128,0,1,float16,float16,0,0.023520000278949738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,128,0,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,128,0,1,fp8,fp8,0,0.023045333723227184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,128,0,1,float16,float16,0,0.023402666052182514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,128,0,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,128,0,1,fp8,fp8,0,0.02309333284695943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,128,0,1,float16,float16,0,0.023221333821614582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,128,0,1,float16,fp8,0,0.023221333821614582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,128,0,1,fp8,fp8,0,0.02345066765944163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,128,0,1,float16,float16,0,0.023706667125225067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,128,0,1,float16,fp8,0,0.023024000227451324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,128,0,1,fp8,fp8,0,0.02348266790310542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,128,0,1,float16,float16,0,0.02309333284695943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,128,0,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,128,0,1,fp8,fp8,0,0.023402666052182514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,128,0,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,128,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,128,0,1,fp8,fp8,0,0.019695999721686046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,128,0,1,float16,float16,0,0.01937066639463107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,128,0,1,float16,fp8,0,0.01960533360640208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,128,0,1,float16,float16,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,128,0,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,128,0,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,128,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,128,0,1,float16,float16,0,0.019280000279347103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,128,0,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,128,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,128,0,1,float16,float16,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,128,0,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,128,0,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,128,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,128,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,128,0,1,float16,float16,0,0.016730666160583496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,128,0,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,128,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,128,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,128,0,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,128,0,1,float16,fp8,0,0.017456000049908955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,128,0,1,fp8,fp8,0,0.016522667060295742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,128,0,1,float16,float16,0,0.01603200038274129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,128,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,128,0,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,128,0,1,float16,float16,0,0.015466666469971338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,128,0,1,float16,fp8,0,0.016410666207472484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,128,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,128,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,128,0,1,float16,float16,0,0.01739199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,128,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,128,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,128,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,128,0,1,fp8,fp8,0,0.016773333152135212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,128,0,1,float16,float16,0,0.015520000209410986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,128,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,128,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,128,0,1,float16,fp8,0,0.01632000009218852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,1,128,0,1,float16,float16,0,0.1267733375231425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,1,128,0,1,float16,fp8,0,0.1262506643931071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,1,128,0,1,fp8,fp8,0,0.11984533071517944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,2,128,0,1,float16,float16,0,0.12852799892425537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,2,128,0,1,float16,fp8,0,0.12840533256530762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,2,128,0,1,fp8,fp8,0,0.12006933490435283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,4,128,0,1,float16,float16,0,0.12841066718101501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,4,128,0,1,float16,fp8,0,0.1283626655737559
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,4,128,0,1,fp8,fp8,0,0.12000532944997151
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,8,128,0,1,float16,fp8,0,0.12819733222325644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,8,128,0,1,fp8,fp8,0,0.12201066811879475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,24,8,128,0,1,float16,float16,0,0.12891733646392822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,24,128,0,1,fp8,fp8,0,0.06846400101979573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,0,0.07148799796899159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,0,0.07258133093516032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,1,128,0,1,float16,float16,0,0.07090133428573608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,1,128,0,1,float16,fp8,0,0.07073066631952922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,1,128,0,1,fp8,fp8,0,0.06817066669464111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,2,128,0,1,float16,float16,0,0.07050133248170216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,2,128,0,1,float16,fp8,0,0.07083733379840851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,2,128,0,1,fp8,fp8,0,0.06826133529345195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,4,128,0,1,float16,float16,0,0.07054399947325389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,4,128,0,1,float16,fp8,0,0.07256533205509186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,4,128,0,1,fp8,fp8,0,0.06821866830190022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,8,128,0,1,float16,float16,0,0.07031466563542683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,8,128,0,1,float16,fp8,0,0.0718453327814738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,24,8,128,0,1,fp8,fp8,0,0.06853866577148438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,0,0.04371733466784159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,0,0.044031997521718345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,24,128,0,1,fp8,fp8,0,0.04286933441956838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,1,128,0,1,float16,float16,0,0.043738668163617454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,1,128,0,1,float16,fp8,0,0.043807998299598694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,1,128,0,1,fp8,fp8,0,0.04238399863243103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,2,128,0,1,float16,float16,0,0.04409066836039225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,2,128,0,1,float16,fp8,0,0.04365866879622141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,2,128,0,1,fp8,fp8,0,0.04171200096607208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,4,128,0,1,float16,float16,0,0.044010668992996216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,4,128,0,1,float16,fp8,0,0.04390400151411692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,4,128,0,1,fp8,fp8,0,0.04179200033346812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,8,128,0,1,float16,float16,0,0.04172799984614054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,8,128,0,1,float16,fp8,0,0.04345066845417023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,24,8,128,0,1,fp8,fp8,0,0.04276266694068909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,0,0.029343999922275543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,0,0.029525332152843475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,24,128,0,1,fp8,fp8,0,0.029487999776999157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,1,128,0,1,float16,float16,0,0.02962133288383484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,1,128,0,1,float16,fp8,0,0.03030933439731598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,1,128,0,1,fp8,fp8,0,0.029525332152843475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,2,128,0,1,float16,float16,0,0.02940266579389572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,2,128,0,1,float16,fp8,0,0.031194667021433514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,4,128,0,1,float16,float16,0,0.03125333289305369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,2,128,0,1,fp8,fp8,0,0.03195200115442276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,4,128,0,1,float16,fp8,0,0.02951466788848241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,4,128,0,1,fp8,fp8,0,0.029706666866938274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,8,128,0,1,float16,float16,0,0.029701332251230877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,8,128,0,1,fp8,fp8,0,0.029311999678611755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,24,8,128,0,1,float16,fp8,0,0.029829333225886028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,0,0.023061332603295643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,0,0.023445333043734234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,24,128,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,1,128,0,1,float16,fp8,0,0.023562667270501454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,1,128,0,1,float16,float16,0,0.023455999791622162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,1,128,0,1,fp8,fp8,0,0.021456000705560047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,2,128,0,1,float16,float16,0,0.023333333432674408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,2,128,0,1,float16,fp8,0,0.023706667125225067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,4,128,0,1,float16,float16,0,0.023247999449570973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,4,128,0,1,float16,fp8,0,0.024154665569464367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,2,128,0,1,fp8,fp8,0,0.02388266722361247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,4,128,0,1,fp8,fp8,0,0.0233599990606308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,8,128,0,1,float16,float16,0,0.02330133318901062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,8,128,0,1,float16,fp8,0,0.02316266546646754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,24,8,128,0,1,fp8,fp8,0,0.025642665723959606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,0,0.020506666352351505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,24,128,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,1,128,0,1,float16,fp8,0,0.020106667031844456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,1,128,0,1,float16,float16,0,0.01952533299724261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,1,128,0,1,fp8,fp8,0,0.01951466624935468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,2,128,0,1,float16,float16,0,0.021183999876181286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,2,128,0,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,2,128,0,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,4,128,0,1,float16,float16,0,0.021013334393501282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,4,128,0,1,float16,fp8,0,0.01995733380317688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,4,128,0,1,fp8,fp8,0,0.020687999824682873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,8,128,0,1,float16,float16,0,0.02035733312368393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,8,128,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,24,8,128,0,1,fp8,fp8,0,0.019381333142518997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,24,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,1,128,0,1,float16,float16,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,1,128,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,1,128,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,2,128,0,1,float16,float16,0,0.017530667285124462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,2,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,2,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,4,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,4,128,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,4,128,0,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,8,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,8,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,24,8,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,24,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,1,128,0,1,float16,float16,0,0.015402667224407196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,1,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,1,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,2,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,2,128,0,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,2,128,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,4,128,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,4,128,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,4,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,8,128,0,1,float16,float16,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,8,128,0,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,24,8,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,0,0.015696000307798386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,24,128,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,1,128,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,1,128,0,1,float16,fp8,0,0.0162773331006368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,1,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,2,128,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,2,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,2,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,4,128,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,4,128,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,4,128,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,8,128,0,1,float16,float16,0,0.015376000354687372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,8,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,24,8,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,128,0,1,fp8,fp8,0,6.30081049601237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,128,0,1,float16,float16,0,8.949626922607422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,128,0,1,float16,fp8,0,10.186576207478842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,128,0,1,float16,float16,0,10.03494962056478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,128,0,1,fp8,fp8,0,6.137999852498372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,128,0,1,float16,fp8,0,10.565594355265299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,128,0,1,float16,float16,0,10.307562510172525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,128,0,1,fp8,fp8,0,6.0556589762369795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,128,0,1,float16,fp8,0,10.223183949788412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,128,0,1,float16,float16,0,12.036015828450521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,128,0,1,fp8,fp8,0,6.449984232584636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,128,0,1,float16,float16,0,6.506533304850261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,128,0,1,fp8,fp8,0,3.00600528717041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,128,0,1,float16,fp8,0,11.619711558024088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,128,0,1,float16,fp8,0,5.846773147583008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,128,0,1,float16,float16,0,5.299167950948079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,128,0,1,fp8,fp8,0,2.8497867584228516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,128,0,1,float16,fp8,0,4.547775904337565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,128,0,1,float16,float16,0,5.699034372965495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,128,0,1,fp8,fp8,0,3.2051572799682617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,128,0,1,float16,fp8,0,5.294015884399414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,128,0,1,float16,float16,0,5.000197410583496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,128,0,1,fp8,fp8,0,3.195568084716797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,128,0,1,float16,fp8,0,5.519802729288737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,128,0,1,float16,float16,0,5.653893152872722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,128,0,1,float16,fp8,0,5.076064109802246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,128,0,1,fp8,fp8,0,3.4081811904907227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,128,0,1,float16,float16,0,2.545087973276774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,128,0,1,float16,fp8,0,2.5364373524983725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,128,0,1,fp8,fp8,0,1.580933411916097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,128,0,1,float16,float16,0,1.873301347096761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,128,0,1,float16,fp8,0,2.4971253077189126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,128,0,1,fp8,fp8,0,1.528864065806071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,128,0,1,float16,float16,0,1.7760052680969238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,128,0,1,fp8,fp8,0,1.6588533719380696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,128,0,1,float16,fp8,0,2.601130644480387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,128,0,1,float16,float16,0,1.8549866676330566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,128,0,1,float16,fp8,0,2.2260533968607583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,128,0,1,fp8,fp8,0,1.5399519602457683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,128,0,1,float16,float16,0,2.5967466036478677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,128,0,1,float16,fp8,0,2.492970625559489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,128,0,1,float16,float16,0,1.0217013359069824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,128,0,1,fp8,fp8,0,1.554250717163086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,128,0,1,fp8,fp8,0,0.896608034769694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,128,0,1,float16,float16,0,1.0024960041046143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,128,0,1,float16,fp8,0,1.233674685160319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,128,0,1,float16,fp8,0,0.9946880340576172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,128,0,1,fp8,fp8,0,0.8708053429921468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,128,0,1,float16,float16,0,1.1617973645528157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,128,0,1,float16,fp8,0,1.011621316274007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,128,0,1,fp8,fp8,0,0.8711199760437012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,128,0,1,float16,float16,0,1.0105173587799072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,128,0,1,float16,fp8,0,1.0580053329467773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,128,0,1,fp8,fp8,0,0.8757812976837158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,128,0,1,float16,float16,0,1.012277364730835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,128,0,1,float16,fp8,0,1.0003466606140137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,128,0,1,fp8,fp8,0,0.8816373348236084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,128,0,1,fp8,fp8,0,3.626607894897461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,128,0,1,float16,float16,0,6.381322860717773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,128,0,1,float16,fp8,0,6.074741363525391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,128,0,1,float16,float16,0,5.505503972371419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,128,0,1,fp8,fp8,0,3.9261814753214517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,128,0,1,float16,fp8,0,6.052021026611328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,128,0,1,float16,float16,0,6.350112279256185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,128,0,1,fp8,fp8,0,3.971642812093099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,128,0,1,float16,fp8,0,5.492378870646159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,128,0,1,float16,float16,0,6.250778834025065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,128,0,1,float16,float16,0,2.073520024617513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,128,0,1,fp8,fp8,0,4.135429382324219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,128,0,1,float16,fp8,0,6.818960189819336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,128,0,1,float16,fp8,0,3.0571200052897134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,128,0,1,fp8,fp8,0,1.809567928314209
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,128,0,1,float16,float16,0,2.8828748067220054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,128,0,1,float16,fp8,0,2.774127960205078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,128,0,1,fp8,fp8,0,1.7328853607177734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,128,0,1,float16,float16,0,2.2781707445780435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,128,0,1,float16,fp8,0,3.4780801137288413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,128,0,1,fp8,fp8,0,1.9075253804524739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,128,0,1,float16,float16,0,3.0497493743896484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,128,0,1,float16,fp8,0,2.017146587371826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,128,0,1,fp8,fp8,0,1.777664025624593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,128,0,1,float16,float16,0,2.0319199562072754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,128,0,1,fp8,fp8,0,1.7699947357177734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,128,0,1,float16,fp8,0,3.051472028096517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,128,0,1,float16,float16,0,1.189354658126831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,128,0,1,float16,fp8,0,1.1187679767608643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,128,0,1,fp8,fp8,0,1.0310773054758708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,128,0,1,float16,float16,0,1.1010613441467285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,128,0,1,float16,fp8,0,1.1340746879577637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,128,0,1,fp8,fp8,0,0.948853333791097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,128,0,1,float16,float16,0,1.0875626405080159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,128,0,1,float16,fp8,0,1.1981600125630696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,128,0,1,fp8,fp8,0,0.9537226359049479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,128,0,1,float16,float16,0,1.1312320232391357
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,128,0,1,float16,fp8,0,1.1203253269195557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,128,0,1,fp8,fp8,0,1.0294880072275798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,128,0,1,float16,float16,0,1.11898668607076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,128,0,1,fp8,fp8,0,0.9695200125376383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,128,0,1,float16,fp8,0,1.3452213605244954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,128,0,1,float16,float16,0,0.6427786747614542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,128,0,1,fp8,fp8,0,0.5760480165481567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,128,0,1,float16,fp8,0,0.6613920132319132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,128,0,1,float16,fp8,0,0.6243040164311727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,128,0,1,float16,float16,0,0.6198346614837646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,128,0,1,fp8,fp8,0,0.5575786828994751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,128,0,1,float16,float16,0,0.6367146571477255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,128,0,1,fp8,fp8,0,0.6118559837341309
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,128,0,1,float16,fp8,0,0.625274658203125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,128,0,1,float16,float16,0,0.6337706645329794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,128,0,1,float16,fp8,0,0.641205350557963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,128,0,1,fp8,fp8,0,0.5597333510716757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,128,0,1,float16,float16,0,0.6352853377660116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,128,0,1,float16,fp8,0,0.640341321627299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,128,0,1,fp8,fp8,0,0.5689493417739868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,128,0,1,fp8,fp8,0,2.413424015045166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,128,0,1,float16,fp8,0,3.9295358657836914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,128,0,1,float16,float16,0,4.191669464111328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,128,0,1,float16,float16,0,4.216389338175456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,128,0,1,fp8,fp8,0,2.4311680793762207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,128,0,1,float16,fp8,0,3.9011093775431314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,128,0,1,float16,float16,0,4.544597307840983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,128,0,1,float16,fp8,0,4.516245206197103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,128,0,1,fp8,fp8,0,2.6166613896687827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,128,0,1,float16,float16,0,3.4959945678710938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,128,0,1,float16,float16,0,1.8664533297220867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,128,0,1,fp8,fp8,0,2.5990986824035645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,128,0,1,float16,fp8,0,4.649274508158366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,128,0,1,float16,fp8,0,1.556730588277181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,128,0,1,fp8,fp8,0,1.337999979654948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,128,0,1,float16,float16,0,1.55187193552653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,128,0,1,fp8,fp8,0,1.2782026926676433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,128,0,1,float16,fp8,0,2.017728010813395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,128,0,1,float16,float16,0,1.4782293637593586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,128,0,1,float16,fp8,0,2.0410985946655273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,128,0,1,fp8,fp8,0,1.2812960147857666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,128,0,1,float16,float16,0,1.9266400337219238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,128,0,1,float16,fp8,0,1.9435787200927734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,128,0,1,fp8,fp8,0,1.2880266507466633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,128,0,1,float16,float16,0,1.4952747027079265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,128,0,1,float16,float16,0,0.84225066502889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,128,0,1,float16,fp8,0,0.8400692939758301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,128,0,1,fp8,fp8,0,1.3043253421783447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,128,0,1,float16,fp8,0,1.7122453053792317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,128,0,1,fp8,fp8,0,0.7410986423492432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,128,0,1,float16,float16,0,0.8086986541748047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,128,0,1,float16,fp8,0,0.8041813373565674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,128,0,1,fp8,fp8,0,1.0430506865183513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,128,0,1,float16,float16,0,0.8156533241271973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,128,0,1,float16,fp8,0,0.8099786440531412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,128,0,1,fp8,fp8,0,0.7364959716796875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,128,0,1,float16,float16,0,0.8252106507619222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,128,0,1,float16,fp8,0,0.8180106480916342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,128,0,1,fp8,fp8,0,0.7154080073038737
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,128,0,1,float16,float16,0,0.8675519625345866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,128,0,1,float16,fp8,0,1.0374399820963542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,128,0,1,fp8,fp8,0,0.7218186855316162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,128,0,1,float16,float16,0,0.5198293526967367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,128,0,1,float16,fp8,0,0.4939519961675008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,128,0,1,fp8,fp8,0,0.44045865535736084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,128,0,1,float16,float16,0,0.47434135278066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,128,0,1,fp8,fp8,0,0.4206133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,128,0,1,float16,fp8,0,0.4719146490097046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,128,0,1,float16,float16,0,0.48127468427022296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,128,0,1,float16,fp8,0,0.47250668207804364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,128,0,1,fp8,fp8,0,0.42419731616973877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,128,0,1,float16,float16,0,0.4790026744206746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,128,0,1,fp8,fp8,0,0.42758933703104657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,128,0,1,float16,fp8,0,0.4771786530812581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,128,0,1,float16,float16,0,0.48715734481811523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,128,0,1,fp8,fp8,0,0.43137065569559735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,128,0,1,float16,fp8,0,0.48519468307495117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,128,0,1,fp8,fp8,0,3.3580640157063804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,128,0,1,float16,float16,0,5.9078934987386065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,128,0,1,float16,fp8,0,5.856970469156901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,128,0,1,float16,float16,0,5.759503682454427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,128,0,1,float16,fp8,0,4.322357177734375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,128,0,1,fp8,fp8,0,3.6733226776123047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,128,0,1,float16,float16,0,5.4487730662028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,128,0,1,fp8,fp8,0,3.2650934855143228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,128,0,1,float16,fp8,0,6.01093864440918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,128,0,1,float16,float16,0,6.095541636149089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,128,0,1,fp8,fp8,0,3.369743982950846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,128,0,1,float16,float16,0,2.701279958089193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,128,0,1,float16,fp8,0,5.828410466512044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,128,0,1,float16,fp8,0,2.8628854751586914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,128,0,1,fp8,fp8,0,1.7636906305948894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,128,0,1,float16,float16,0,2.696048100789388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,128,0,1,float16,fp8,0,2.2468640009562173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,128,0,1,fp8,fp8,0,1.6975785891215007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,128,0,1,float16,float16,0,1.9153067270914714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,128,0,1,fp8,fp8,0,1.712410608927409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,128,0,1,float16,fp8,0,2.5635093053181968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,128,0,1,float16,fp8,0,1.94705597559611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,128,0,1,float16,float16,0,2.9255733489990234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,128,0,1,fp8,fp8,0,1.6736960411071777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,128,0,1,float16,float16,0,2.6857226689656577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,128,0,1,float16,float16,0,1.1154879728953044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,128,0,1,float16,fp8,0,2.6581013997395835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,128,0,1,fp8,fp8,0,2.006271998087565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,128,0,1,fp8,fp8,0,0.9371573130289713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,128,0,1,float16,fp8,0,1.5148426691691081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,128,0,1,float16,fp8,0,1.12226136525472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,128,0,1,float16,float16,0,1.3865440686543782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,128,0,1,fp8,fp8,0,0.8856213092803955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,128,0,1,float16,float16,0,1.48634672164917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,128,0,1,float16,fp8,0,1.2191840012868245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,128,0,1,fp8,fp8,0,1.109829346338908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,128,0,1,float16,float16,0,1.023850679397583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,128,0,1,fp8,fp8,0,0.8991680145263672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,128,0,1,float16,fp8,0,1.028282642364502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,128,0,1,float16,float16,0,1.0295360088348389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,128,0,1,float16,fp8,0,1.0436000029246013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,128,0,1,float16,float16,0,0.5877013206481934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,128,0,1,fp8,fp8,0,0.9131360054016113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,128,0,1,float16,fp8,0,0.5934826532999674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,128,0,1,fp8,fp8,0,0.6892373561859131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,128,0,1,float16,float16,0,0.5605760018030802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,128,0,1,float16,fp8,0,0.5725013415018717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,128,0,1,fp8,fp8,0,0.591322660446167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,128,0,1,float16,float16,0,0.5683039824167887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,128,0,1,float16,fp8,0,0.5683519840240479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,128,0,1,fp8,fp8,0,0.5059626499811808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,128,0,1,float16,float16,0,0.5711679855982462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,128,0,1,float16,fp8,0,0.5730079809824625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,128,0,1,fp8,fp8,0,0.5217066605885824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,128,0,1,float16,float16,0,0.5807093381881714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,128,0,1,float16,fp8,0,0.5811359882354736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,128,0,1,float16,float16,0,0.36003732681274414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,128,0,1,fp8,fp8,0,0.5181279977162679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,128,0,1,float16,fp8,0,0.3588693141937256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,128,0,1,fp8,fp8,0,0.32207467158635456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,128,0,1,float16,float16,0,0.337226668993632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,128,0,1,float16,fp8,0,0.34091734886169434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,128,0,1,fp8,fp8,0,0.307914674282074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,128,0,1,float16,float16,0,0.3412746588389079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,128,0,1,float16,fp8,0,0.3389493227005005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,128,0,1,fp8,fp8,0,0.3052320082982381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,128,0,1,float16,float16,0,0.3411946694056193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,128,0,1,float16,fp8,0,0.341973344484965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,128,0,1,fp8,fp8,0,0.3095039923985799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,128,0,1,float16,float16,0,0.34493335088094074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,128,0,1,float16,fp8,0,0.34803199768066406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,128,0,1,fp8,fp8,0,0.3169493277867635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,128,0,1,fp8,fp8,0,1.992304007212321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,128,0,1,float16,fp8,0,2.895696004231771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,128,0,1,float16,float16,0,3.1685705184936523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,128,0,1,float16,float16,0,2.2813919385274253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,128,0,1,float16,fp8,0,3.00384521484375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,128,0,1,fp8,fp8,0,2.022549311319987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,128,0,1,float16,float16,0,3.199824015299479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,128,0,1,fp8,fp8,0,2.0256053606669107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,128,0,1,float16,fp8,0,2.915744145711263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,128,0,1,float16,float16,0,3.1783574422200522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,128,0,1,float16,fp8,0,3.12065060933431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,128,0,1,fp8,fp8,0,2.17905060450236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,128,0,1,float16,float16,0,1.6500479380289714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,128,0,1,float16,fp8,0,1.2624853452046711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,128,0,1,fp8,fp8,0,1.1183679898579915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,128,0,1,float16,float16,0,1.1954773267110188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,128,0,1,fp8,fp8,0,1.1695093313852947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,128,0,1,float16,float16,0,1.1880906422932942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,128,0,1,float16,fp8,0,1.3757440249125164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,128,0,1,float16,fp8,0,1.3975787162780762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,128,0,1,fp8,fp8,0,1.041162649790446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,128,0,1,float16,float16,0,1.2120853265126545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,128,0,1,fp8,fp8,0,1.0566879908243816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,128,0,1,float16,fp8,0,1.601088047027588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,128,0,1,float16,float16,0,1.21670397122701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,128,0,1,float16,fp8,0,1.2314773400624592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,128,0,1,float16,float16,0,0.906602700551351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,128,0,1,fp8,fp8,0,1.075055996576945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,128,0,1,float16,fp8,0,0.6816159884134928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,128,0,1,fp8,fp8,0,0.7191253503163656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,128,0,1,float16,float16,0,0.7223359743754069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,128,0,1,float16,fp8,0,0.6443680127461752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,128,0,1,fp8,fp8,0,0.5692319869995117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,128,0,1,float16,float16,0,0.6420319875081381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,128,0,1,fp8,fp8,0,0.566645344098409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,128,0,1,float16,fp8,0,0.6429119904836019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,128,0,1,float16,float16,0,0.6492426792780558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,128,0,1,float16,fp8,0,0.6508533159891764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,128,0,1,fp8,fp8,0,0.5757173299789429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,128,0,1,float16,float16,0,0.6556426684061686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,128,0,1,float16,float16,0,0.388592004776001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,128,0,1,float16,fp8,0,0.6623306671778361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,128,0,1,float16,fp8,0,0.4047199885050456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,128,0,1,fp8,fp8,0,0.6319413185119629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,128,0,1,fp8,fp8,0,0.35040533542633057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,128,0,1,float16,float16,0,0.36774933338165283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,128,0,1,float16,fp8,0,0.3784480094909668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,128,0,1,fp8,fp8,0,0.3277066747347514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,128,0,1,float16,float16,0,0.3644053141276042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,128,0,1,float16,fp8,0,0.3672693173090617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,128,0,1,fp8,fp8,0,0.3570133447647095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,128,0,1,float16,float16,0,0.36735467116038006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,128,0,1,float16,fp8,0,0.37061866124471027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,128,0,1,fp8,fp8,0,0.33355732758839923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,128,0,1,float16,float16,0,0.3732373317082723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,128,0,1,float16,fp8,0,0.3772960106531779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,128,0,1,float16,float16,0,0.2400053342183431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,128,0,1,fp8,fp8,0,0.33963199456532794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,128,0,1,float16,fp8,0,0.23876800139745077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,128,0,1,fp8,fp8,0,0.21894399325052896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,128,0,1,float16,float16,0,0.2227893273035685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,128,0,1,fp8,fp8,0,0.20679465929667154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,128,0,1,float16,fp8,0,0.2235413392384847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,128,0,1,float16,float16,0,0.22431466976801553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,128,0,1,float16,fp8,0,0.2283359964688619
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,128,0,1,fp8,fp8,0,0.20620266596476236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,128,0,1,float16,float16,0,0.2250666618347168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,128,0,1,float16,fp8,0,0.2246239980061849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,128,0,1,fp8,fp8,0,0.20973332722981772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,128,0,1,float16,float16,0,0.2274506688117981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,128,0,1,float16,fp8,0,0.22923199335734049
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,128,0,1,fp8,fp8,0,0.21100266774495444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,128,0,1,fp8,fp8,0,2.0548693339029946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,128,0,1,float16,float16,0,2.517866611480713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,128,0,1,float16,float16,0,2.4524213473002114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,128,0,1,float16,fp8,0,3.0145654678344727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,128,0,1,float16,fp8,0,3.03549861907959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,128,0,1,fp8,fp8,0,2.067237377166748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,128,0,1,float16,float16,0,3.1294933954874673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,128,0,1,float16,fp8,0,3.1734612782796225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,128,0,1,fp8,fp8,0,2.090581258138021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,128,0,1,float16,float16,0,2.4420266151428223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,128,0,1,float16,fp8,0,3.0486186345418296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,128,0,1,fp8,fp8,0,2.1914827028910318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,128,0,1,float16,float16,0,1.306986649831136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,128,0,1,float16,fp8,0,1.2940106391906738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,128,0,1,fp8,fp8,0,1.1914026737213135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,128,0,1,float16,float16,0,1.2075200080871582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,128,0,1,float16,fp8,0,1.3148852984110515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,128,0,1,fp8,fp8,0,1.2134133179982503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,128,0,1,float16,float16,0,1.1935733159383137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,128,0,1,float16,fp8,0,1.2698720296223958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,128,0,1,fp8,fp8,0,1.0568532943725586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,128,0,1,float16,float16,0,1.2040747006734211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,128,0,1,float16,fp8,0,1.3253973325093586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,128,0,1,fp8,fp8,0,1.0654079914093018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,128,0,1,float16,float16,0,1.3720266024271648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,128,0,1,float16,fp8,0,1.3242932955423992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,128,0,1,float16,float16,0,0.6723573207855225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,128,0,1,fp8,fp8,0,1.098954677581787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,128,0,1,float16,fp8,0,0.7442773183186849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,128,0,1,fp8,fp8,0,0.6556853453318278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,128,0,1,float16,float16,0,0.6301173369089762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,128,0,1,float16,fp8,0,0.6326773166656494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,128,0,1,fp8,fp8,0,0.6382026672363281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,128,0,1,float16,float16,0,0.6404426495234171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,128,0,1,float16,fp8,0,0.6372799873352051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,128,0,1,fp8,fp8,0,0.5603679815928141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,128,0,1,float16,float16,0,0.641269326210022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,128,0,1,float16,fp8,0,0.6404639879862467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,128,0,1,fp8,fp8,0,0.5696800152460734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,128,0,1,float16,float16,0,0.6515253384908041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,128,0,1,float16,fp8,0,0.7176053524017334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,128,0,1,fp8,fp8,0,0.5832533439000448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,128,0,1,float16,float16,0,0.3726880153020223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,128,0,1,float16,fp8,0,0.412714680035909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,128,0,1,fp8,fp8,0,0.3385973374048869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,128,0,1,float16,float16,0,0.3452426592508952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,128,0,1,float16,fp8,0,0.34432534376780194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,128,0,1,fp8,fp8,0,0.31220799684524536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,128,0,1,float16,float16,0,0.3495946725209554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,128,0,1,float16,fp8,0,0.34840532143910724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,128,0,1,fp8,fp8,0,0.31298667192459106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,128,0,1,float16,float16,0,0.35317333539326984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,128,0,1,fp8,fp8,0,0.3169813354810079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,128,0,1,float16,fp8,0,0.3521173397699992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,128,0,1,float16,float16,0,0.3611573378245036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,128,0,1,fp8,fp8,0,0.323855996131897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,128,0,1,float16,fp8,0,0.3643893400828044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,128,0,1,float16,float16,0,0.2214240034421285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,128,0,1,fp8,fp8,0,0.20357332626978555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,128,0,1,float16,fp8,0,0.2226346731185913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,128,0,1,float16,float16,0,0.20354666312535605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,128,0,1,fp8,fp8,0,0.18177066246668497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,128,0,1,float16,fp8,0,0.2026240030924479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,128,0,1,float16,float16,0,0.20241065820058188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,128,0,1,fp8,fp8,0,0.18353599309921265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,128,0,1,float16,fp8,0,0.2023893396059672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,128,0,1,float16,float16,0,0.20573866367340088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,128,0,1,fp8,fp8,0,0.1892426609992981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,128,0,1,float16,fp8,0,0.20626666148503622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,128,0,1,float16,float16,0,0.21238933006922403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,128,0,1,fp8,fp8,0,0.19390400250752768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,128,0,1,float16,fp8,0,0.21368533372879028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,128,0,1,float16,float16,0,0.13917332887649536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,128,0,1,fp8,fp8,0,0.13222933808962503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,128,0,1,float16,fp8,0,0.13945600390434265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,128,0,1,float16,float16,0,0.13475733002026877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,128,0,1,fp8,fp8,0,0.12618666887283325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,128,0,1,float16,float16,0,0.13519466916720072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,128,0,1,float16,fp8,0,0.13482133547465006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,128,0,1,float16,fp8,0,0.13424533605575562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,128,0,1,fp8,fp8,0,0.12657066186269125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,128,0,1,float16,float16,0,0.13437333703041077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,128,0,1,float16,fp8,0,0.134853333234787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,128,0,1,fp8,fp8,0,0.12841066718101501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,128,0,1,float16,float16,0,0.1356106698513031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,128,0,1,float16,fp8,0,0.13612266381581625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,128,0,1,fp8,fp8,0,0.12602133552233377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,128,0,1,fp8,fp8,0,1.3192906379699707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,128,0,1,float16,float16,0,1.4883467356363933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,128,0,1,float16,fp8,0,1.5133013725280762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,128,0,1,float16,float16,0,1.501856009165446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,128,0,1,fp8,fp8,0,1.3361172676086426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,128,0,1,float16,fp8,0,1.5047413508097331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,128,0,1,float16,float16,0,1.6795679728190105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,128,0,1,float16,fp8,0,1.5320587158203125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,128,0,1,fp8,fp8,0,1.3585279782613118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,128,0,1,float16,float16,0,1.557637373606364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,128,0,1,fp8,fp8,0,1.4056960741678874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,128,0,1,float16,fp8,0,1.8650186856587727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,128,0,1,float16,float16,0,0.8388160069783529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,128,0,1,float16,fp8,0,0.8524426619211832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,128,0,1,fp8,fp8,0,0.7629226843516032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,128,0,1,float16,fp8,0,0.7773973147074381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,128,0,1,float16,float16,0,0.8282026449839274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,128,0,1,fp8,fp8,0,0.869978666305542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,128,0,1,float16,float16,0,0.7774079640706381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,128,0,1,float16,fp8,0,0.7822026411692301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,128,0,1,fp8,fp8,0,0.7939146359761556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,128,0,1,float16,float16,0,0.7872320016225179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,128,0,1,float16,fp8,0,0.7933812936147054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,128,0,1,fp8,fp8,0,0.7033440272013346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,128,0,1,float16,float16,0,0.8107199668884277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,128,0,1,float16,fp8,0,0.8174560070037842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,128,0,1,fp8,fp8,0,0.7220319906870524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,128,0,1,float16,float16,0,0.4474773406982422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,128,0,1,fp8,fp8,0,0.4652479887008667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,128,0,1,float16,fp8,0,0.45559998353322345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,128,0,1,float16,float16,0,0.41118399302164715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,128,0,1,float16,fp8,0,0.4142773151397705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,128,0,1,fp8,fp8,0,0.36978665987650555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,128,0,1,float16,float16,0,0.4205546776453654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,128,0,1,float16,fp8,0,0.4164693355560303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,128,0,1,fp8,fp8,0,0.3746560017267863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,128,0,1,float16,float16,0,0.41743465264638263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,128,0,1,float16,fp8,0,0.4254879951477051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,128,0,1,fp8,fp8,0,0.37836265563964844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,128,0,1,float16,float16,0,0.4298400084177653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,128,0,1,float16,float16,0,0.2495573361714681
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,128,0,1,float16,fp8,0,0.4341013431549072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,128,0,1,fp8,fp8,0,0.39098668098449707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,128,0,1,float16,fp8,0,0.25541333357493085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,128,0,1,fp8,fp8,0,0.2318399945894877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,128,0,1,float16,float16,0,0.2274186611175537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,128,0,1,float16,fp8,0,0.23031467199325562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,128,0,1,fp8,fp8,0,0.210042675336202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,128,0,1,float16,float16,0,0.22976533571879068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,128,0,1,float16,fp8,0,0.2313973307609558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,128,0,1,fp8,fp8,0,0.21266132593154907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,128,0,1,float16,float16,0,0.23425066471099854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,128,0,1,float16,fp8,0,0.2371093432108561
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,128,0,1,fp8,fp8,0,0.2167946696281433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,128,0,1,float16,float16,0,0.24037333329518637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,128,0,1,float16,fp8,0,0.24386133750279745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,128,0,1,fp8,fp8,0,0.22202666600545248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,128,0,1,float16,float16,0,0.15147733688354492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,128,0,1,float16,fp8,0,0.15236799915631613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,128,0,1,fp8,fp8,0,0.14084800084431967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,128,0,1,float16,float16,0,0.13822399576505026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,128,0,1,float16,fp8,0,0.13909866412480673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,128,0,1,fp8,fp8,0,0.1246506671110789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,128,0,1,float16,float16,0,0.13970133662223816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,128,0,1,float16,fp8,0,0.13825600345929465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,128,0,1,fp8,fp8,0,0.12519466876983643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,128,0,1,float16,float16,0,0.13825066884358725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,128,0,1,float16,fp8,0,0.13990933696428934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,128,0,1,fp8,fp8,0,0.12798933188120523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,128,0,1,float16,float16,0,0.14064000050226846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,128,0,1,float16,fp8,0,0.14297599593798319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,128,0,1,fp8,fp8,0,0.13423466682434082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,128,0,1,float16,float16,0,0.09531733393669128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,128,0,1,float16,fp8,0,0.09880533814430237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,128,0,1,fp8,fp8,0,0.0921493371327718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,128,0,1,float16,float16,0,0.09334400296211243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,128,0,1,float16,fp8,0,0.09474133451779683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,128,0,1,fp8,fp8,0,0.08877866466840108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,128,0,1,float16,float16,0,0.09387733538945515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,128,0,1,float16,fp8,0,0.09517332911491394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,128,0,1,fp8,fp8,0,0.08734400073687236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,128,0,1,float16,float16,0,0.09460266431172688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,128,0,1,float16,fp8,0,0.09458133578300476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,128,0,1,fp8,fp8,0,0.08853866656621297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,128,0,1,float16,float16,0,0.095360000928243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,128,0,1,float16,fp8,0,0.09526399771372478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,128,0,1,fp8,fp8,0,0.08970133463541667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,128,0,1,float16,float16,0,1.6179733276367188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,128,0,1,fp8,fp8,0,1.4491359392801921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,128,0,1,float16,fp8,0,1.6395893096923828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,128,0,1,float16,fp8,0,1.6419413884480794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,128,0,1,float16,float16,0,1.6633386611938477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,128,0,1,fp8,fp8,0,1.4963199297587078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,128,0,1,float16,float16,0,1.6619787216186523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,128,0,1,fp8,fp8,0,1.4983092943827312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,128,0,1,float16,fp8,0,1.6760907173156738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,128,0,1,float16,float16,0,1.748869260152181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,128,0,1,float16,fp8,0,1.7563573519388835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,128,0,1,float16,float16,0,0.9228479862213135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,128,0,1,float16,fp8,0,1.0402026971181233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,128,0,1,fp8,fp8,0,1.5743892987569172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,128,0,1,fp8,fp8,0,0.8475146293640137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,128,0,1,float16,float16,0,0.8300373554229736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,128,0,1,float16,fp8,0,0.8408959706624349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,128,0,1,fp8,fp8,0,0.7439626852671305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,128,0,1,float16,float16,0,0.8394133249918619
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,128,0,1,float16,fp8,0,0.841210683186849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,128,0,1,fp8,fp8,0,0.7545119921366373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,128,0,1,float16,float16,0,0.8591413497924805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,128,0,1,float16,fp8,0,0.8574666976928711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,128,0,1,fp8,fp8,0,0.7654720147450765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,128,0,1,float16,float16,0,0.890618642171224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,128,0,1,float16,fp8,0,0.9828426837921143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,128,0,1,float16,float16,0,0.4821919997533162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,128,0,1,float16,fp8,0,0.4929493268330892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,128,0,1,fp8,fp8,0,0.803376038869222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,128,0,1,fp8,fp8,0,0.5078826745351156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,128,0,1,float16,float16,0,0.43695465723673504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,128,0,1,float16,fp8,0,0.4411199887593587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,128,0,1,fp8,fp8,0,0.39487465222676593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,128,0,1,float16,float16,0,0.440720001856486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,128,0,1,float16,fp8,0,0.44276265303293866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,128,0,1,fp8,fp8,0,0.3965119918187459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,128,0,1,float16,float16,0,0.4448426564534505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,128,0,1,float16,fp8,0,0.4500799973805745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,128,0,1,fp8,fp8,0,0.40174400806427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,128,0,1,float16,float16,0,0.46105066935221356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,128,0,1,float16,fp8,0,0.46828798453013104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,128,0,1,float16,float16,0,0.2606026728947957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,128,0,1,fp8,fp8,0,0.4208000103632609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,128,0,1,float16,fp8,0,0.2669386665026347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,128,0,1,fp8,fp8,0,0.24386133750279745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,128,0,1,float16,float16,0,0.23380800088246664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,128,0,1,float16,fp8,0,0.23945067326227823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,128,0,1,fp8,fp8,0,0.21711466709772745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,128,0,1,float16,float16,0,0.2394239902496338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,128,0,1,float16,fp8,0,0.23883734146753946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,128,0,1,fp8,fp8,0,0.21960532665252686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,128,0,1,float16,float16,0,0.2444159984588623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,128,0,1,float16,fp8,0,0.2480213244756063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,128,0,1,fp8,fp8,0,0.222543994585673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,128,0,1,float16,float16,0,0.2509760061899821
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,128,0,1,float16,fp8,0,0.2567253311475118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,128,0,1,float16,float16,0,0.14910399913787842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,128,0,1,fp8,fp8,0,0.2326186696688334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,128,0,1,float16,fp8,0,0.15176533659299216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,128,0,1,fp8,fp8,0,0.1422719955444336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,128,0,1,float16,float16,0,0.13215999801953635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,128,0,1,float16,fp8,0,0.1307360033194224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,128,0,1,fp8,fp8,0,0.12178132931391399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,128,0,1,float16,float16,0,0.13195733229319254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,128,0,1,float16,fp8,0,0.13217066725095114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,128,0,1,fp8,fp8,0,0.12405332922935486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,128,0,1,float16,float16,0,0.13403733571370444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,128,0,1,float16,fp8,0,0.13554666439692178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,128,0,1,fp8,fp8,0,0.1299199958642324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,128,0,1,float16,float16,0,0.14089600245157877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,128,0,1,float16,fp8,0,0.142767995595932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,128,0,1,fp8,fp8,0,0.13580800096193948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,128,0,1,float16,float16,0,0.08964266379674275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,128,0,1,float16,fp8,0,0.09327999750773112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,128,0,1,fp8,fp8,0,0.0890880028406779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,128,0,1,float16,float16,0,0.08502933382987976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,128,0,1,float16,fp8,0,0.08603200316429138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,128,0,1,fp8,fp8,0,0.07919466495513916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,128,0,1,float16,float16,0,0.08614933490753174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,128,0,1,float16,fp8,0,0.0867146650950114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,128,0,1,fp8,fp8,0,0.08165333171685536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,128,0,1,float16,float16,0,0.08694932858149211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,128,0,1,float16,fp8,0,0.08782399694124858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,128,0,1,fp8,fp8,0,0.0817333310842514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,128,0,1,float16,float16,0,0.08769067128499348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,128,0,1,float16,fp8,0,0.08930666248003642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,128,0,1,fp8,fp8,0,0.0832533339659373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,128,0,1,float16,float16,0,0.06052800019582113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,128,0,1,float16,fp8,0,0.0618399977684021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,128,0,1,fp8,fp8,0,0.05826666454474131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,128,0,1,float16,float16,0,0.0583840012550354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,128,0,1,float16,fp8,0,0.05993066728115082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,128,0,1,fp8,fp8,0,0.05801600217819214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,128,0,1,float16,float16,0,0.058730666836102806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,128,0,1,float16,fp8,0,0.0584853341182073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,128,0,1,fp8,fp8,0,0.0580320010582606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,128,0,1,float16,float16,0,0.05850133299827576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,128,0,1,float16,fp8,0,0.059290667374928795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,128,0,1,fp8,fp8,0,0.05798399945100149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,128,0,1,float16,float16,0,0.05825600028038025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,128,0,1,float16,fp8,0,0.060005332032839455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,128,0,1,fp8,fp8,0,0.05819199979305267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,128,0,1,float16,float16,0,1.1015893618265789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,128,0,1,float16,fp8,0,1.1006240049997966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,128,0,1,fp8,fp8,0,0.9876266320546468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,128,0,1,fp8,fp8,0,1.0006666978200276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,128,0,1,float16,float16,0,1.1131199995676677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,128,0,1,float16,fp8,0,1.1179893016815186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,128,0,1,fp8,fp8,0,1.0155466397603352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,128,0,1,float16,float16,0,1.1356853644053142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,128,0,1,float16,fp8,0,1.1373173395792644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,128,0,1,float16,float16,0,0.636303981145223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,128,0,1,float16,float16,0,1.1922773520151775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,128,0,1,fp8,fp8,0,1.0697387059529622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,128,0,1,float16,fp8,0,1.2029919624328613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,128,0,1,float16,fp8,0,0.649621327718099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,128,0,1,fp8,fp8,0,0.5912266572316488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,128,0,1,float16,float16,0,0.5717759927113851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,128,0,1,float16,fp8,0,0.5698986848195394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,128,0,1,fp8,fp8,0,0.5088106791178385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,128,0,1,float16,float16,0,0.5755039850870768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,128,0,1,fp8,fp8,0,0.5137333472569784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,128,0,1,float16,fp8,0,0.5791946649551392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,128,0,1,float16,float16,0,0.5824960072835287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,128,0,1,float16,fp8,0,0.5874133507410685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,128,0,1,fp8,fp8,0,0.5250773429870605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,128,0,1,float16,float16,0,0.6026506821314493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,128,0,1,float16,fp8,0,0.6092160145441691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,128,0,1,float16,float16,0,0.3349599838256836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,128,0,1,fp8,fp8,0,0.550927996635437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,128,0,1,float16,fp8,0,0.3423413435618083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,128,0,1,fp8,fp8,0,0.31143466631571454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,128,0,1,float16,float16,0,0.2990559935569763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,128,0,1,float16,fp8,0,0.3010293245315552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,128,0,1,fp8,fp8,0,0.27187732855478924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,128,0,1,float16,float16,0,0.30163200696309406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,128,0,1,float16,fp8,0,0.3028106689453125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,128,0,1,fp8,fp8,0,0.27529066801071167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,128,0,1,float16,float16,0,0.30631999174753827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,128,0,1,float16,fp8,0,0.30858665704727173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,128,0,1,fp8,fp8,0,0.2802240053812663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,128,0,1,float16,float16,0,0.31733866532643634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,128,0,1,float16,fp8,0,0.3219839930534363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,128,0,1,float16,float16,0,0.18686399857203165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,128,0,1,fp8,fp8,0,0.29095999399820965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,128,0,1,float16,fp8,0,0.1890666683514913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,128,0,1,fp8,fp8,0,0.17349867026011148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,128,0,1,float16,float16,0,0.1590079963207245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,128,0,1,float16,fp8,0,0.15917866428693137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,128,0,1,fp8,fp8,0,0.15031466881434122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,128,0,1,float16,float16,0,0.1627840002377828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,128,0,1,float16,fp8,0,0.16453333695729574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,128,0,1,fp8,fp8,0,0.15269866585731506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,128,0,1,float16,float16,0,0.16858132680257162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,128,0,1,float16,fp8,0,0.16826132933298746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,128,0,1,fp8,fp8,0,0.15689067045847574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,128,0,1,float16,float16,0,0.17451733350753784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,128,0,1,float16,fp8,0,0.17907732725143433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,128,0,1,fp8,fp8,0,0.1622719963391622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,128,0,1,float16,float16,0,0.10761599739392598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,128,0,1,float16,fp8,0,0.1088106632232666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,128,0,1,fp8,fp8,0,0.10293866197268169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,128,0,1,float16,float16,0,0.0953493316968282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,128,0,1,float16,fp8,0,0.09478400150934856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,128,0,1,fp8,fp8,0,0.08716799815495808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,128,0,1,float16,float16,0,0.09506666660308838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,128,0,1,float16,fp8,0,0.09538132945696513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,128,0,1,fp8,fp8,0,0.08726933598518372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,128,0,1,float16,float16,0,0.09586133559544881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,128,0,1,float16,fp8,0,0.09719467163085938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,128,0,1,fp8,fp8,0,0.09114133318265279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,128,0,1,float16,float16,0,0.09915199875831604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,128,0,1,float16,fp8,0,0.09992532928784688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,128,0,1,fp8,fp8,0,0.09728533029556274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,128,0,1,float16,float16,0,0.06613866488138835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,128,0,1,float16,fp8,0,0.0680159976085027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,128,0,1,fp8,fp8,0,0.06345599889755249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,128,0,1,float16,float16,0,0.06469333171844482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,128,0,1,float16,fp8,0,0.06462400158246358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,128,0,1,fp8,fp8,0,0.05834666887919108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,128,0,1,float16,float16,0,0.06489600241184235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,128,0,1,float16,fp8,0,0.06422399977842967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,128,0,1,fp8,fp8,0,0.05990933378537496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,128,0,1,float16,float16,0,0.0642986645301183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,128,0,1,float16,fp8,0,0.06421866516272227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,128,0,1,fp8,fp8,0,0.06038400034109751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,128,0,1,float16,float16,0,0.0645653357108434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,128,0,1,float16,fp8,0,0.0663679987192154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,128,0,1,fp8,fp8,0,0.06213866670926412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,128,0,1,float16,float16,0,0.050250664353370667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,128,0,1,float16,fp8,0,0.04980800052483877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,128,0,1,fp8,fp8,0,0.047770669062932335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,128,0,1,float16,float16,0,0.048138668139775596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,128,0,1,float16,fp8,0,0.04795200129350027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,128,0,1,fp8,fp8,0,0.04567466676235199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,128,0,1,float16,float16,0,0.0479360024134318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,128,0,1,float16,fp8,0,0.04784533381462097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,128,0,1,fp8,fp8,0,0.04576533536116282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,128,0,1,float16,float16,0,0.04836266736189524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,128,0,1,float16,fp8,0,0.04794666667779287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,128,0,1,fp8,fp8,0,0.047744000951449074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,128,0,1,float16,float16,0,0.048298666874567665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,128,0,1,float16,fp8,0,0.048154667019844055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,128,0,1,fp8,fp8,0,0.04775466521581014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,128,0,1,float16,float16,0,1.1678773562113445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,128,0,1,fp8,fp8,0,1.107151985168457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,128,0,1,float16,fp8,0,1.1588799953460693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,128,0,1,float16,float16,0,1.1852533022562664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,128,0,1,fp8,fp8,0,1.1182560125986736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,128,0,1,float16,fp8,0,1.175546646118164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,128,0,1,float16,float16,0,1.2552906672159831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,128,0,1,float16,float16,0,1.2443146705627441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,128,0,1,fp8,fp8,0,1.335327943166097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,128,0,1,float16,fp8,0,1.2276586691538494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,128,0,1,float16,float16,0,0.6789440313975016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,128,0,1,float16,fp8,0,0.677839994430542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,128,0,1,float16,fp8,0,1.233669360478719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,128,0,1,fp8,fp8,0,1.2757973670959473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,128,0,1,fp8,fp8,0,0.6629813512166342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,128,0,1,float16,float16,0,0.6011093457539877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,128,0,1,float16,fp8,0,0.5940693219502767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,128,0,1,fp8,fp8,0,0.5651040077209473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,128,0,1,float16,float16,0,0.6095786492029825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,128,0,1,float16,fp8,0,0.6046293179194132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,128,0,1,fp8,fp8,0,0.5708160003026327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,128,0,1,float16,float16,0,0.6326719919840494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,128,0,1,float16,fp8,0,0.6287413438161215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,128,0,1,fp8,fp8,0,0.6617813507715861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,128,0,1,float16,float16,0,0.6347946723302206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,128,0,1,float16,fp8,0,0.6293600002924601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,128,0,1,float16,float16,0,0.3537813425064087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,128,0,1,fp8,fp8,0,0.611786683400472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,128,0,1,float16,fp8,0,0.3484799861907959
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,128,0,1,fp8,fp8,0,0.3449386755625407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,128,0,1,float16,float16,0,0.31330132484436035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,128,0,1,float16,fp8,0,0.31196800867716473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,128,0,1,fp8,fp8,0,0.2947946588198344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,128,0,1,float16,float16,0,0.3184799949328105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,128,0,1,float16,fp8,0,0.31835732857386273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,128,0,1,fp8,fp8,0,0.2963786721229553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,128,0,1,float16,float16,0,0.32740267117818195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,128,0,1,float16,fp8,0,0.32958932717641193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,128,0,1,fp8,fp8,0,0.3231733242670695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,128,0,1,float16,float16,0,0.3315146764119466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,128,0,1,float16,fp8,0,0.32730666796366376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,128,0,1,float16,float16,0,0.19082132975260416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,128,0,1,fp8,fp8,0,0.321834663550059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,128,0,1,float16,fp8,0,0.18771199385325113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,128,0,1,fp8,fp8,0,0.18483734130859375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,128,0,1,float16,float16,0,0.1692053278287252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,128,0,1,float16,fp8,0,0.16619199514389038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,128,0,1,fp8,fp8,0,0.15651733676592508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,128,0,1,float16,float16,0,0.17375467220942178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,128,0,1,float16,fp8,0,0.17110933860143027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,128,0,1,fp8,fp8,0,0.16014933586120605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,128,0,1,float16,float16,0,0.17736534277598062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,128,0,1,float16,fp8,0,0.17696533600489298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,128,0,1,fp8,fp8,0,0.170522669951121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,128,0,1,float16,float16,0,0.18354666233062744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,128,0,1,float16,fp8,0,0.1778293251991272
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,128,0,1,fp8,fp8,0,0.17113065719604492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,128,0,1,float16,float16,0,0.10994133353233337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,128,0,1,float16,fp8,0,0.10844799876213074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,128,0,1,fp8,fp8,0,0.10527466734250386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,128,0,1,float16,float16,0,0.09412266810735066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,128,0,1,float16,fp8,0,0.09202133615811665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,128,0,1,fp8,fp8,0,0.08708266417185466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,128,0,1,float16,float16,0,0.09436800082524617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,128,0,1,float16,fp8,0,0.09322133660316467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,128,0,1,fp8,fp8,0,0.08742933471997578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,128,0,1,float16,float16,0,0.0976746678352356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,128,0,1,float16,fp8,0,0.09628267089525859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,128,0,1,fp8,fp8,0,0.0953493316968282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,128,0,1,float16,float16,0,0.10297600428263347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,128,0,1,float16,fp8,0,0.09992000460624695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,128,0,1,fp8,fp8,0,0.09706133604049683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,128,0,1,float16,float16,0,0.061861331264177956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,128,0,1,float16,fp8,0,0.060405333836873375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,128,0,1,fp8,fp8,0,0.06196266909440359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,128,0,1,float16,float16,0,0.05831466615200043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,128,0,1,float16,fp8,0,0.05666666726271311
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,128,0,1,fp8,fp8,0,0.05473599831263224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,128,0,1,float16,float16,0,0.05842133363087972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,128,0,1,float16,fp8,0,0.05787200232346853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,128,0,1,fp8,fp8,0,0.05453333258628845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,128,0,1,float16,float16,0,0.05913599828879038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,128,0,1,float16,fp8,0,0.058378666639328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,128,0,1,fp8,fp8,0,0.05653333167235056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,128,0,1,float16,float16,0,0.057999998331069946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,128,0,1,float16,fp8,0,0.059392000238100685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,128,0,1,fp8,fp8,0,0.05782933533191681
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,128,0,1,float16,float16,0,0.03878399978081385
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,128,0,1,float16,fp8,0,0.03942399968703588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,128,0,1,fp8,fp8,0,0.039877332746982574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,128,0,1,float16,float16,0,0.037589333951473236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,128,0,1,float16,fp8,0,0.03745066622893015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,128,0,1,fp8,fp8,0,0.03772266705830892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,128,0,1,float16,float16,0,0.037903999288876854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,128,0,1,float16,fp8,0,0.03791466603676478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,128,0,1,fp8,fp8,0,0.03587199995915095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,128,0,1,float16,float16,0,0.039642666776975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,128,0,1,float16,fp8,0,0.03750933210055033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,128,0,1,fp8,fp8,0,0.03788266579310099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,128,0,1,float16,float16,0,0.03751999884843826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,128,0,1,float16,fp8,0,0.03745066622893015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,128,0,1,float16,float16,0,0.03364799916744232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,128,0,1,fp8,fp8,0,0.037802666425704956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,128,0,1,float16,fp8,0,0.033439998825391136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,128,0,1,fp8,fp8,0,0.03331200033426285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,128,0,1,float16,float16,0,0.03173866619666418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,128,0,1,float16,fp8,0,0.031727999448776245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,128,0,1,fp8,fp8,0,0.03162666658560435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,128,0,1,float16,float16,0,0.03316800047953924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,128,0,1,float16,fp8,0,0.03335466732581457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,128,0,1,fp8,fp8,0,0.031514666974544525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,128,0,1,float16,float16,0,0.03357866654793421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,128,0,1,float16,fp8,0,0.03339199970165888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,128,0,1,fp8,fp8,0,0.033402666449546814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,128,0,1,float16,float16,0,0.03349333256483078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,128,0,1,float16,fp8,0,0.033344000577926636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,128,0,1,fp8,fp8,0,0.03129599988460541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,128,0,1,float16,float16,0,0.9974880218505859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,128,0,1,float16,fp8,0,0.9881386756896973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,128,0,1,fp8,fp8,0,0.9483146667480469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,128,0,1,float16,float16,0,1.0133492946624756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,128,0,1,fp8,fp8,0,0.959824005762736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,128,0,1,float16,fp8,0,1.0047893524169922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,128,0,1,float16,float16,0,1.0895520051320393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,128,0,1,float16,fp8,0,1.063695987065633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,128,0,1,float16,float16,0,1.0687519709269206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,128,0,1,fp8,fp8,0,1.1813973585764568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,128,0,1,float16,fp8,0,1.0474560260772705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,128,0,1,float16,float16,0,0.591818650563558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,128,0,1,float16,fp8,0,0.5882826646169027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,128,0,1,fp8,fp8,0,1.055573304494222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,128,0,1,float16,float16,0,0.5113759835561117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,128,0,1,fp8,fp8,0,0.5846399863560995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,128,0,1,float16,fp8,0,0.5080426534016927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,128,0,1,fp8,fp8,0,0.4870719909667969
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,128,0,1,float16,float16,0,0.5197973251342773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,128,0,1,fp8,fp8,0,0.4902240037918091
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,128,0,1,float16,fp8,0,0.5187199910481771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,128,0,1,float16,float16,0,0.5407893260320028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,128,0,1,float16,fp8,0,0.5363893508911133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,128,0,1,fp8,fp8,0,0.5802453358968099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,128,0,1,float16,float16,0,0.5502346754074097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,128,0,1,float16,fp8,0,0.5394506851832072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,128,0,1,float16,float16,0,0.308351993560791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,128,0,1,float16,fp8,0,0.3019733428955078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,128,0,1,fp8,fp8,0,0.5315413475036621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,128,0,1,fp8,fp8,0,0.30107732613881427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,128,0,1,float16,float16,0,0.2654026746749878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,128,0,1,float16,fp8,0,0.26557334264119464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,128,0,1,fp8,fp8,0,0.2523146669069926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,128,0,1,float16,float16,0,0.2681066592534383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,128,0,1,float16,fp8,0,0.2681066592534383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,128,0,1,fp8,fp8,0,0.2550453344980876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,128,0,1,float16,float16,0,0.2820799946784973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,128,0,1,float16,fp8,0,0.28093866507212323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,128,0,1,fp8,fp8,0,0.27878399689992267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,128,0,1,float16,float16,0,0.28643200794855755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,128,0,1,float16,fp8,0,0.2820533315340678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,128,0,1,float16,float16,0,0.16451199849446616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,128,0,1,fp8,fp8,0,0.27906133731206256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,128,0,1,float16,fp8,0,0.16339733203252158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,128,0,1,float16,float16,0,0.14108799894650778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,128,0,1,fp8,fp8,0,0.16276266177495322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,128,0,1,float16,fp8,0,0.14154133200645447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,128,0,1,fp8,fp8,0,0.13432000080744425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,128,0,1,float16,float16,0,0.14575999975204468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,128,0,1,float16,fp8,0,0.1434346636136373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,128,0,1,fp8,fp8,0,0.13665599624315897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,128,0,1,float16,float16,0,0.15158933401107788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,128,0,1,float16,fp8,0,0.1495039959748586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,128,0,1,fp8,fp8,0,0.1463466684023539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,128,0,1,float16,float16,0,0.15477333466211954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,128,0,1,float16,fp8,0,0.15205867091814676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,128,0,1,fp8,fp8,0,0.14750400185585022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,128,0,1,float16,float16,0,0.09513066212336223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,128,0,1,float16,fp8,0,0.09180266658465068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,128,0,1,fp8,fp8,0,0.0916319986184438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,128,0,1,float16,float16,0,0.078575998544693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,128,0,1,float16,fp8,0,0.07805333534876506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,128,0,1,fp8,fp8,0,0.07451733450094859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,128,0,1,float16,float16,0,0.07985599835713704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,128,0,1,float16,fp8,0,0.07904000083605449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,128,0,1,fp8,fp8,0,0.07486933469772339
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,128,0,1,float16,float16,0,0.08336533109347026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,128,0,1,float16,fp8,0,0.08217599987983704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,128,0,1,fp8,fp8,0,0.08269333342711131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,128,0,1,float16,float16,0,0.08664000034332275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,128,0,1,float16,fp8,0,0.08415466547012329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,128,0,1,float16,float16,0,0.05273066461086273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,128,0,1,fp8,fp8,0,0.08548800150553386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,128,0,1,float16,fp8,0,0.052005335688591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,128,0,1,fp8,fp8,0,0.054010664423306785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,128,0,1,float16,float16,0,0.04827199876308441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,128,0,1,float16,fp8,0,0.04784533381462097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,128,0,1,float16,float16,0,0.04806933303674062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,128,0,1,float16,fp8,0,0.049626668294270836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,128,0,1,fp8,fp8,0,0.045978665351867676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,128,0,1,fp8,fp8,0,0.04574400186538696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,128,0,1,float16,float16,0,0.04996266464392344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,128,0,1,float16,fp8,0,0.05022933085759481
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,128,0,1,float16,float16,0,0.050000001986821495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,128,0,1,fp8,fp8,0,0.04980266590913137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,128,0,1,float16,fp8,0,0.05018133421738943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,128,0,1,float16,float16,0,0.03356266766786575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,128,0,1,fp8,fp8,0,0.048298666874567665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,128,0,1,float16,fp8,0,0.03369600077470144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,128,0,1,fp8,fp8,0,0.03338133295377096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,128,0,1,float16,float16,0,0.0315733328461647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,128,0,1,float16,fp8,0,0.03172266731659571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,128,0,1,fp8,fp8,0,0.031354665756225586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,128,0,1,float16,float16,0,0.03151999910672506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,128,0,1,float16,fp8,0,0.03137599925200144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,128,0,1,fp8,fp8,0,0.03138133386770884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,128,0,1,float16,fp8,0,0.0322826678554217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,128,0,1,float16,float16,0,0.03342933456103007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,128,0,1,fp8,fp8,0,0.03161066770553589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,128,0,1,float16,float16,0,0.03165333221356074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,128,0,1,float16,fp8,0,0.03329066683848699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,128,0,1,fp8,fp8,0,0.03340800106525421
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,128,0,1,float16,float16,0,0.027424000203609467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,128,0,1,float16,fp8,0,0.02754133443037669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,128,0,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,128,0,1,float16,float16,0,0.02740799884001414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,128,0,1,float16,fp8,0,0.027232001225153606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,128,0,1,fp8,fp8,0,0.025616000096003216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,128,0,1,float16,float16,0,0.027466667195161183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,128,0,1,float16,fp8,0,0.027263998985290527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,128,0,1,fp8,fp8,0,0.027215999861558277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,128,0,1,float16,float16,0,0.02731200059254964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,128,0,1,float16,fp8,0,0.02734400083621343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,128,0,1,fp8,fp8,0,0.02625600000222524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,128,0,1,float16,float16,0,0.02757333219051361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,128,0,1,float16,fp8,0,0.027029333015282948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,128,0,1,fp8,fp8,0,0.026911998788515728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,128,0,1,float16,float16,0,0.02537599951028824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,128,0,1,float16,fp8,0,0.024154665569464367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,128,0,1,fp8,fp8,0,0.023408000667889912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,128,0,1,float16,float16,0,0.02514133354028066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,128,0,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,128,0,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,128,0,1,float16,float16,0,0.025125332176685333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,128,0,1,float16,fp8,0,0.02513066679239273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,128,0,1,float16,float16,0,0.02372266600529353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,128,0,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,128,0,1,float16,fp8,0,0.023402666052182514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,128,0,1,float16,float16,0,0.02345066765944163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,128,0,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,128,0,1,fp8,fp8,0,0.0236160010099411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,128,0,1,fp8,fp8,0,0.023386667172114056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,128,0,1,float16,float16,0,0.4594133297602336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,128,0,1,float16,fp8,0,0.45032533009847003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,128,0,1,float16,float16,0,0.47259732087453205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,128,0,1,fp8,fp8,0,0.4412320057551066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,128,0,1,fp8,fp8,0,0.44669334093729657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,128,0,1,float16,fp8,0,0.46509865919748944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,128,0,1,float16,float16,0,0.4964640140533447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,128,0,1,float16,fp8,0,0.4896959861119588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,128,0,1,fp8,fp8,0,0.5304319858551025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,128,0,1,float16,float16,0,0.5011680126190186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,128,0,1,float16,fp8,0,0.491215984026591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,128,0,1,float16,float16,0,0.2829493284225464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,128,0,1,fp8,fp8,0,0.5029013156890869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,128,0,1,float16,fp8,0,0.2741919954617818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,128,0,1,fp8,fp8,0,0.2807199954986572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,128,0,1,float16,float16,0,0.23813867568969727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,128,0,1,float16,fp8,0,0.2361066738764445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,128,0,1,fp8,fp8,0,0.22851200898488364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,128,0,1,float16,float16,0,0.24609067042668661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,128,0,1,float16,fp8,0,0.24381866057713827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,128,0,1,fp8,fp8,0,0.2325119972229004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,128,0,1,float16,float16,0,0.258517324924469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,128,0,1,float16,fp8,0,0.2535039981206258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,128,0,1,fp8,fp8,0,0.26336532831192017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,128,0,1,float16,float16,0,0.2630400061607361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,128,0,1,float16,fp8,0,0.25707733631134033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,128,0,1,fp8,fp8,0,0.25439467032750446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,128,0,1,float16,float16,0,0.1525759994983673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,128,0,1,float16,fp8,0,0.14910933375358582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,128,0,1,fp8,fp8,0,0.15102400382359824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,128,0,1,float16,float16,0,0.12595733006795248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,128,0,1,float16,fp8,0,0.12364799777666728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,128,0,1,fp8,fp8,0,0.12178666392962138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,128,0,1,float16,float16,0,0.12966400384902954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,128,0,1,float16,fp8,0,0.12929600477218628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,128,0,1,fp8,fp8,0,0.1254026691118876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,128,0,1,float16,float16,0,0.13751999537150064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,128,0,1,float16,fp8,0,0.1357919971148173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,128,0,1,fp8,fp8,0,0.1378506620724996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,128,0,1,float16,float16,0,0.1402400036652883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,128,0,1,float16,fp8,0,0.13846400380134583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,128,0,1,fp8,fp8,0,0.13672533631324768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,128,0,1,float16,float16,0,0.08885866403579712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,128,0,1,float16,fp8,0,0.08724799752235413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,128,0,1,fp8,fp8,0,0.08705066641171773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,128,0,1,float16,float16,0,0.0711359977722168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,128,0,1,float16,fp8,0,0.06945600112279256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,128,0,1,fp8,fp8,0,0.06816533207893372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,128,0,1,float16,float16,0,0.07286400099595387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,128,0,1,float16,fp8,0,0.07128533224264781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,128,0,1,fp8,fp8,0,0.06789866586526234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,128,0,1,float16,float16,0,0.07554133236408234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,128,0,1,float16,fp8,0,0.07468266785144806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,128,0,1,fp8,fp8,0,0.07666133344173431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,128,0,1,float16,float16,0,0.07853866616884868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,128,0,1,float16,fp8,0,0.07623466849327087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,128,0,1,fp8,fp8,0,0.07859733204046886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,128,0,1,float16,float16,0,0.046298667788505554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,128,0,1,float16,fp8,0,0.04566933214664459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,128,0,1,fp8,fp8,0,0.04773333172003428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,128,0,1,float16,float16,0,0.04162666698296865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,128,0,1,float16,fp8,0,0.04228800038496653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,128,0,1,fp8,fp8,0,0.04165333261092504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,128,0,1,float16,float16,0,0.043712000052134194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,128,0,1,float16,fp8,0,0.04376000165939331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,128,0,1,fp8,fp8,0,0.041264000038305916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,128,0,1,float16,float16,0,0.04381333291530609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,128,0,1,float16,fp8,0,0.04368533194065094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,128,0,1,fp8,fp8,0,0.044250667095184326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,128,0,1,float16,float16,0,0.04540266593297323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,128,0,1,float16,fp8,0,0.043663998444875084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,128,0,1,fp8,fp8,0,0.043920000394185386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,128,0,1,float16,float16,0,0.03141333411137263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,128,0,1,float16,fp8,0,0.029520000020662945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,128,0,1,fp8,fp8,0,0.03149333347876867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,128,0,1,float16,float16,0,0.02834133307139079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,128,0,1,float16,fp8,0,0.029530666768550873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,128,0,1,fp8,fp8,0,0.02757333219051361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,128,0,1,float16,float16,0,0.02938666691382726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,128,0,1,float16,fp8,0,0.029370665550231934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,128,0,1,fp8,fp8,0,0.028042666614055634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,128,0,1,float16,float16,0,0.029674666623274486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,128,0,1,float16,fp8,0,0.029520000020662945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,128,0,1,fp8,fp8,0,0.029493334392706554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,128,0,1,float16,float16,0,0.029669334491093952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,128,0,1,float16,fp8,0,0.029839999973773956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,128,0,1,fp8,fp8,0,0.03129599988460541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,128,0,1,float16,float16,0,0.024864000578721363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,128,0,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,128,0,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,128,0,1,float16,float16,0,0.0233599990606308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,128,0,1,float16,fp8,0,0.025248001019159954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,128,0,1,fp8,fp8,0,0.023077333966890972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,128,0,1,float16,float16,0,0.02513599892457326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,128,0,1,float16,fp8,0,0.02316266546646754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,128,0,1,fp8,fp8,0,0.023247999449570973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,128,0,1,float16,float16,0,0.02508266766866048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,128,0,1,float16,fp8,0,0.025381334125995636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,128,0,1,fp8,fp8,0,0.023525332411130268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,128,0,1,float16,float16,0,0.025066666305065155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,128,0,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,128,0,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,128,0,1,float16,float16,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,128,0,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,128,0,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,128,0,1,float16,float16,0,0.021045332153638203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,128,0,1,float16,fp8,0,0.021381333470344543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,128,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,128,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,128,0,1,float16,fp8,0,0.021402666966120403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,128,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,128,0,1,float16,float16,0,0.02179733415444692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,128,0,1,float16,fp8,0,0.021226666867733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,128,0,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,128,0,1,float16,float16,0,0.02181333303451538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,128,0,1,float16,fp8,0,0.021525333325068157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,128,0,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,128,0,1,float16,float16,0,0.021040000021457672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,128,0,1,float16,fp8,0,0.020389333367347717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,128,0,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,128,0,1,float16,float16,0,0.020432000358899433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,128,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,128,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,128,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,128,0,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,128,0,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,128,0,1,float16,float16,0,0.020938667158285778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,128,0,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,128,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,128,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,128,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,128,0,1,fp8,fp8,0,0.020560000091791153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,128,0,1,float16,float16,0,0.2475093404452006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,128,0,1,float16,fp8,0,0.2450773318608602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,128,0,1,fp8,fp8,0,0.2466986576716105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,128,0,1,float16,float16,0,0.25408534208933514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,128,0,1,float16,fp8,0,0.2471146583557129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,128,0,1,fp8,fp8,0,0.24687467018763223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,128,0,1,float16,float16,0,0.2701279918352763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,128,0,1,float16,fp8,0,0.2638133366902669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,128,0,1,fp8,fp8,0,0.26981866359710693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,128,0,1,float16,fp8,0,0.2654986580212911
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,128,0,1,float16,float16,0,0.272597332795461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,128,0,1,fp8,fp8,0,0.2752053340276082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,128,0,1,float16,float16,0,0.15497066577275595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,128,0,1,float16,fp8,0,0.15176000197728476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,128,0,1,fp8,fp8,0,0.15647466977437338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,128,0,1,float16,float16,0,0.13179199894269308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,128,0,1,float16,fp8,0,0.13039466738700867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,128,0,1,fp8,fp8,0,0.12990400195121765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,128,0,1,float16,float16,0,0.13476266463597616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,128,0,1,float16,fp8,0,0.13235732913017273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,128,0,1,fp8,fp8,0,0.1320693294207255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,128,0,1,float16,fp8,0,0.1402079959710439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,128,0,1,float16,float16,0,0.14435199896494547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,128,0,1,fp8,fp8,0,0.14078933000564575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,128,0,1,float16,float16,0,0.14662933349609375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,128,0,1,float16,fp8,0,0.14138666788736978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,128,0,1,fp8,fp8,0,0.14443199833234152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,128,0,1,float16,float16,0,0.08710933725039165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,128,0,1,float16,fp8,0,0.08563733100891113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,128,0,1,fp8,fp8,0,0.08910399675369263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,128,0,1,float16,float16,0,0.07236800094445546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,128,0,1,float16,fp8,0,0.07083199918270111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,128,0,1,fp8,fp8,0,0.07043733199437459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,128,0,1,float16,float16,0,0.07446933289368947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,128,0,1,float16,fp8,0,0.0727040022611618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,128,0,1,fp8,fp8,0,0.07082666456699371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,128,0,1,float16,float16,0,0.07857066889603932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,128,0,1,float16,fp8,0,0.07667733232180278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,128,0,1,fp8,fp8,0,0.07704000174999237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,128,0,1,float16,float16,0,0.08063466846942902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,128,0,1,float16,float16,0,0.04782933493455251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,128,0,1,float16,fp8,0,0.07670933504899342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,128,0,1,fp8,fp8,0,0.07958933214346568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,128,0,1,float16,fp8,0,0.04805333415667216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,128,0,1,fp8,fp8,0,0.04977599779764811
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,128,0,1,float16,float16,0,0.043696001172065735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,128,0,1,fp8,fp8,0,0.04205333193143209
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,128,0,1,float16,float16,0,0.04563199977080027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,128,0,1,float16,fp8,0,0.045050665736198425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,128,0,1,float16,fp8,0,0.045653333266576133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,128,0,1,fp8,fp8,0,0.04381333291530609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,128,0,1,float16,float16,0,0.046112000942230225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,128,0,1,float16,fp8,0,0.046069333950678505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,128,0,1,fp8,fp8,0,0.04633066554864248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,128,0,1,float16,float16,0,0.047237331668535866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,128,0,1,float16,fp8,0,0.0459199994802475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,128,0,1,fp8,fp8,0,0.046282668908437095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,128,0,1,float16,float16,0,0.03215999901294708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,128,0,1,float16,fp8,0,0.03207999964555105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,128,0,1,fp8,fp8,0,0.03187733391920725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,128,0,1,float16,float16,0,0.02956799914439519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,128,0,1,fp8,fp8,0,0.029317334294319153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,128,0,1,float16,fp8,0,0.030031998952229817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,128,0,1,float16,float16,0,0.029322666426499683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,128,0,1,float16,fp8,0,0.029370665550231934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,128,0,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,128,0,1,float16,fp8,0,0.031082667410373688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,128,0,1,float16,float16,0,0.03189333279927572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,128,0,1,fp8,fp8,0,0.03001066545645396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,128,0,1,float16,float16,0,0.03125333289305369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,128,0,1,float16,fp8,0,0.03156266609827677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,128,0,1,float16,float16,0,0.02143466720978419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,128,0,1,fp8,fp8,0,0.031397332747777305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,128,0,1,float16,fp8,0,0.02146666745344798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,128,0,1,fp8,fp8,0,0.02162133405605952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,128,0,1,float16,float16,0,0.021183999876181286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,128,0,1,float16,fp8,0,0.021514666577180225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,128,0,1,fp8,fp8,0,0.021381333470344543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,128,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,128,0,1,float16,fp8,0,0.021525333325068157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,128,0,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,128,0,1,float16,float16,0,0.021183999876181286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,128,0,1,float16,fp8,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,128,0,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,128,0,1,float16,float16,0,0.02182399978240331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,128,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,128,0,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,128,0,1,float16,float16,0,0.0195573332409064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,128,0,1,float16,fp8,0,0.0195573332409064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,128,0,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,128,0,1,float16,float16,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,128,0,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,128,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,128,0,1,float16,float16,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,128,0,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,128,0,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,128,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,128,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,128,0,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,128,0,1,float16,float16,0,0.019120000302791595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,128,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,128,0,1,fp8,fp8,0,0.01836266616980235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,128,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,128,0,1,float16,fp8,0,0.017770666629076004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,128,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,128,0,1,float16,fp8,0,0.017621333400408428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,128,0,1,float16,fp8,0,0.01747200017174085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,128,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,128,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,128,0,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,128,0,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,128,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,128,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,128,0,1,float16,fp8,0,0.01884799947341283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,128,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,128,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,128,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,128,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,128,0,1,float16,fp8,0,0.01886933296918869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,128,0,1,float16,float16,0,0.16657066345214844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,128,0,1,float16,fp8,0,0.16686399777730307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,128,0,1,fp8,fp8,0,0.1628320018450419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,128,0,1,float16,float16,0,0.1705066760381063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,128,0,1,float16,fp8,0,0.1670560042063395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,128,0,1,fp8,fp8,0,0.16356266538302103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,128,0,1,float16,float16,0,0.17331733306248984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,128,0,1,float16,fp8,0,0.17584532499313354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,128,0,1,fp8,fp8,0,0.17358932892481485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,128,0,1,float16,float16,0,0.17537067333857217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,128,0,1,float16,fp8,0,0.17643733819325766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,128,0,1,fp8,fp8,0,0.1768746574719747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,128,0,1,float16,float16,0,0.10457600156466167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,128,0,1,float16,fp8,0,0.1023466686407725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,128,0,1,fp8,fp8,0,0.10455466310183208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,128,0,1,float16,float16,0,0.09175466497739156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,128,0,1,float16,fp8,0,0.09172800183296204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,128,0,1,fp8,fp8,0,0.08794666330019633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,128,0,1,float16,float16,0,0.09128533800443013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,128,0,1,fp8,fp8,0,0.08904000123341878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,128,0,1,float16,float16,0,0.09699733058611552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,128,0,1,float16,fp8,0,0.09191466371218364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,128,0,1,float16,fp8,0,0.09573333462079366
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,128,0,1,fp8,fp8,0,0.09672533472379048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,128,0,1,float16,float16,0,0.09733866651852925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,128,0,1,float16,fp8,0,0.09809600313504536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,128,0,1,float16,float16,0,0.0565226674079895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,128,0,1,fp8,fp8,0,0.09687999884287517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,128,0,1,float16,fp8,0,0.05412266651789347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,128,0,1,float16,float16,0,0.052058666944503784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,128,0,1,fp8,fp8,0,0.05811200042565664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,128,0,1,float16,fp8,0,0.05170666674772898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,128,0,1,fp8,fp8,0,0.04991999765237173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,128,0,1,float16,float16,0,0.052149335543314614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,128,0,1,fp8,fp8,0,0.05027199784914652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,128,0,1,float16,float16,0,0.053818667928377785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,128,0,1,float16,fp8,0,0.052144000927607216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,128,0,1,float16,fp8,0,0.05398400127887726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,128,0,1,fp8,fp8,0,0.054085334142049156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,128,0,1,float16,float16,0,0.05402666827042898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,128,0,1,float16,fp8,0,0.054330666859944664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,128,0,1,fp8,fp8,0,0.05287999908129374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,128,0,1,float16,float16,0,0.03595199932654699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,128,0,1,float16,fp8,0,0.035504000882307686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,128,0,1,fp8,fp8,0,0.03730666637420654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,128,0,1,float16,float16,0,0.03565333286921183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,128,0,1,float16,fp8,0,0.03563733398914337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,128,0,1,fp8,fp8,0,0.03385599950949351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,128,0,1,float16,fp8,0,0.035749333600203194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,128,0,1,float16,float16,0,0.03522133330504099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,128,0,1,fp8,fp8,0,0.03350399931271871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,128,0,1,float16,float16,0,0.035717333356539406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,128,0,1,float16,fp8,0,0.035274667044480644
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,128,0,1,fp8,fp8,0,0.035631999373435974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,128,0,1,float16,float16,0,0.03573866685231527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,128,0,1,float16,fp8,0,0.035317334036032356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,128,0,1,float16,float16,0,0.025370667378107708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,128,0,1,fp8,fp8,0,0.03604800005753835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,128,0,1,float16,fp8,0,0.024559999505678814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,128,0,1,float16,float16,0,0.02526933451493581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,128,0,1,float16,fp8,0,0.02349333216746648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,128,0,1,fp8,fp8,0,0.02515733242034912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,128,0,1,fp8,fp8,0,0.024586667617162068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,128,0,1,float16,float16,0,0.024933333198229473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,128,0,1,float16,fp8,0,0.024906667570273083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,128,0,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,128,0,1,float16,float16,0,0.02369600037733714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,128,0,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,128,0,1,fp8,fp8,0,0.025274666647116344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,128,0,1,float16,float16,0,0.02347733328739802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,128,0,1,float16,fp8,0,0.023525332411130268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,128,0,1,fp8,fp8,0,0.02526933451493581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,128,0,1,float16,float16,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,128,0,1,float16,fp8,0,0.018272000054518383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,128,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,128,0,1,float16,float16,0,0.019120000302791595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,128,0,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,128,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,128,0,1,float16,float16,0,0.019152000546455383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,128,0,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,128,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,128,0,1,float16,float16,0,0.019152000546455383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,128,0,1,float16,fp8,0,0.01759999990463257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,128,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,128,0,1,float16,float16,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,128,0,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,128,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,128,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,128,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,128,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,128,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,128,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,128,0,1,fp8,fp8,0,0.017498667041460674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,128,0,1,float16,float16,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,128,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,128,0,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,128,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,128,0,1,fp8,fp8,0,0.015573333948850632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,128,0,1,float16,float16,0,0.015311999867359797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,128,0,1,float16,float16,0,0.015562667200962702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,128,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,128,0,1,fp8,fp8,0,0.016693333784739178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,128,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,128,0,1,fp8,fp8,0,0.017621333400408428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,128,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,128,0,1,fp8,fp8,0,0.01640533283352852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,128,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,128,0,1,fp8,fp8,0,0.01748266691962878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,128,0,1,float16,float16,0,0.12732266386349997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,128,0,1,float16,fp8,0,0.12759466965993246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,128,0,1,fp8,fp8,0,0.12185600399971008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,128,0,1,float16,float16,0,0.13147733608881632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,128,0,1,float16,fp8,0,0.1281706690788269
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,128,0,1,fp8,fp8,0,0.12198932965596516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,128,0,1,float16,float16,0,0.13177067041397095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,128,0,1,float16,fp8,0,0.1325706640879313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,128,0,1,fp8,fp8,0,0.1283573309580485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,128,0,1,float16,float16,0,0.13173333803812662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,128,0,1,float16,float16,0,0.07412800192832947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,128,0,1,fp8,fp8,0,0.13030933340390524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,128,0,1,float16,fp8,0,0.13172266880671182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,128,0,1,float16,fp8,0,0.07474133372306824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,128,0,1,fp8,fp8,0,0.0726560006539027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,128,0,1,float16,float16,0,0.07073600093523662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,128,0,1,float16,fp8,0,0.07076266904671986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,128,0,1,fp8,fp8,0,0.06850133339564006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,128,0,1,float16,float16,0,0.0715946654478709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,128,0,1,float16,fp8,0,0.07074666519959767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,128,0,1,fp8,fp8,0,0.06803733110427856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,128,0,1,float16,float16,0,0.07262399792671204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,128,0,1,float16,fp8,0,0.07066133121649425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,128,0,1,fp8,fp8,0,0.0706826647122701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,128,0,1,float16,float16,0,0.07212799787521362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,128,0,1,float16,fp8,0,0.07057600220044453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,128,0,1,fp8,fp8,0,0.07065066695213318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,128,0,1,float16,float16,0,0.04393066465854645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,128,0,1,float16,fp8,0,0.04397333165009817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,128,0,1,fp8,fp8,0,0.04346133271853129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,128,0,1,float16,fp8,0,0.043765331308046974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,128,0,1,float16,float16,0,0.04200533529122671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,128,0,1,fp8,fp8,0,0.04161600023508072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,128,0,1,float16,float16,0,0.04390933116277059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,128,0,1,fp8,fp8,0,0.04167466859022776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,128,0,1,float16,fp8,0,0.04221866528193156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,128,0,1,float16,float16,0,0.04411733150482178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,128,0,1,float16,fp8,0,0.04398933549722036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,128,0,1,fp8,fp8,0,0.0440586656332016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,128,0,1,float16,fp8,0,0.043663998444875084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,128,0,1,float16,float16,0,0.043791999419530235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,128,0,1,fp8,fp8,0,0.044453332821528115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,128,0,1,float16,float16,0,0.02943466603755951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,128,0,1,float16,fp8,0,0.029445332785447437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,128,0,1,fp8,fp8,0,0.02938666691382726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,128,0,1,float16,fp8,0,0.029311999678611755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,128,0,1,float16,float16,0,0.027376001079877216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,128,0,1,fp8,fp8,0,0.02773866554101308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,128,0,1,float16,float16,0,0.02926933268706004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,128,0,1,float16,fp8,0,0.028736000259717304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,128,0,1,fp8,fp8,0,0.029450667401154835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,128,0,1,float16,float16,0,0.02939733366171519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,128,0,1,float16,fp8,0,0.02937600016593933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,128,0,1,fp8,fp8,0,0.029711998999118805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,128,0,1,float16,float16,0,0.029530666768550873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,128,0,1,float16,fp8,0,0.02922133356332779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,128,0,1,fp8,fp8,0,0.029904000461101532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,128,0,1,float16,float16,0,0.021850667893886566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,128,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,128,0,1,fp8,fp8,0,0.02145066608985265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,128,0,1,float16,float16,0,0.02110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,128,0,1,float16,fp8,0,0.021066665649414062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,128,0,1,fp8,fp8,0,0.021221332252025604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,128,0,1,float16,float16,0,0.021082667013009388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,128,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,128,0,1,float16,float16,0,0.021530665457248688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,128,0,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,128,0,1,float16,fp8,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,128,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,128,0,1,float16,float16,0,0.02130666623512904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,128,0,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,128,0,1,fp8,fp8,0,0.021397332350413006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,128,0,1,float16,float16,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,128,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,128,0,1,fp8,fp8,0,0.018101333330074947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,128,0,1,float16,fp8,0,0.017557332913080852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,128,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,128,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,128,0,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,128,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,128,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,128,0,1,float16,float16,0,0.017445333302021027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,128,0,1,fp8,fp8,0,0.017445333302021027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,128,0,1,float16,fp8,0,0.017637333522240322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,128,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,128,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,128,0,1,float16,float16,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,128,0,1,fp8,fp8,0,0.017829333742459614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,128,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,128,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,128,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,128,0,1,float16,float16,0,0.016122666498025257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,128,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,128,0,1,float16,fp8,0,0.016575999557971954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,128,0,1,float16,float16,0,0.016271999726692837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,128,0,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,128,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,128,0,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,128,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,128,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,128,0,1,float16,float16,0,0.015674666812022526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,128,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,128,0,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,128,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,128,0,1,float16,float16,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,128,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,128,0,1,float16,fp8,0,0.016597333053747814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,128,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,128,0,1,float16,float16,0,0.10552533467610677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,128,0,1,float16,fp8,0,0.10586667060852051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,128,0,1,fp8,fp8,0,0.10335999727249146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,128,0,1,float16,fp8,0,0.10595200459162395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,128,0,1,float16,float16,0,0.10610133409500122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,128,0,1,fp8,fp8,0,0.1025279959042867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,128,0,1,float16,float16,0,0.10764267047246297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,128,0,1,float16,fp8,0,0.10613333185513814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,128,0,1,fp8,fp8,0,0.10586133599281311
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,128,0,1,float16,float16,0,0.10773332913716634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,128,0,1,float16,fp8,0,0.10777599612871806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,128,0,1,fp8,fp8,0,0.1053546667098999
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,128,0,1,float16,float16,0,0.062463998794555664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,128,0,1,float16,fp8,0,0.06057066718737284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,128,0,1,fp8,fp8,0,0.06007466713587443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,128,0,1,float16,float16,0,0.06020266811052958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,128,0,1,float16,fp8,0,0.060085331400235496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,128,0,1,fp8,fp8,0,0.058287998040517174
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,128,0,1,float16,float16,0,0.06161599854628245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,128,0,1,float16,fp8,0,0.060266668597857155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,128,0,1,fp8,fp8,0,0.0583840012550354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,128,0,1,float16,float16,0,0.06206933160622915
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,128,0,1,float16,fp8,0,0.060346667965253196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,128,0,1,fp8,fp8,0,0.06214933097362518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,128,0,1,float16,float16,0,0.061008001367251076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,128,0,1,float16,fp8,0,0.062309334675470986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,128,0,1,fp8,fp8,0,0.062352001667022705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,128,0,1,float16,float16,0,0.03746666759252548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,128,0,1,float16,fp8,0,0.03782399992148081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,128,0,1,fp8,fp8,0,0.037605332831541695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,128,0,1,float16,float16,0,0.037658666570981346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,128,0,1,float16,fp8,0,0.03755733370780945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,128,0,1,fp8,fp8,0,0.03578133384386698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,128,0,1,float16,float16,0,0.037632000943024956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,128,0,1,float16,fp8,0,0.03760000069936117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,128,0,1,fp8,fp8,0,0.03737066686153412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,128,0,1,float16,float16,0,0.03774933268626531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,128,0,1,float16,fp8,0,0.03732266773780187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,128,0,1,fp8,fp8,0,0.037615999579429626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,128,0,1,float16,float16,0,0.03754133234421412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,128,0,1,float16,fp8,0,0.03761066744724909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,128,0,1,fp8,fp8,0,0.037647999823093414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,128,0,1,float16,fp8,0,0.02722666660944621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,128,0,1,float16,float16,0,0.02548266698916753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,128,0,1,fp8,fp8,0,0.02716800073782603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,128,0,1,float16,float16,0,0.025253333151340485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,128,0,1,float16,fp8,0,0.02569066733121872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,128,0,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,128,0,1,float16,float16,0,0.025248001019159954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,128,0,1,float16,fp8,0,0.02535466601451238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,128,0,1,fp8,fp8,0,0.02550933261712392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,128,0,1,float16,fp8,0,0.025221332907676697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,128,0,1,float16,float16,0,0.025402667621771496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,128,0,1,fp8,fp8,0,0.02516799916823705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,128,0,1,float16,float16,0,0.025226667523384094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,128,0,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,128,0,1,fp8,fp8,0,0.02536533276240031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,128,0,1,float16,float16,0,0.020047999918460846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,128,0,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,128,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,128,0,1,float16,float16,0,0.020853333175182343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,128,0,1,fp8,fp8,0,0.020288000504175823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,128,0,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,128,0,1,float16,float16,0,0.021205333371957142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,128,0,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,128,0,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,128,0,1,float16,float16,0,0.0199946661790212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,128,0,1,float16,fp8,0,0.021045332153638203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,128,0,1,fp8,fp8,0,0.02130666623512904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,128,0,1,float16,float16,0,0.02011200040578842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,128,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,128,0,1,fp8,fp8,0,0.019493332753578823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,128,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,128,0,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,128,0,1,float16,float16,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,128,0,1,float16,float16,0,0.01748266691962878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,128,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,128,0,1,float16,float16,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,128,0,1,float16,fp8,0,0.017658667018016178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,128,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,128,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,128,0,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,128,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,128,0,1,float16,float16,0,0.01552533358335495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,128,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,128,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,128,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,128,0,1,fp8,fp8,0,0.015791999797026317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,128,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,128,0,1,fp8,fp8,0,0.015594666202863058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,128,0,1,float16,float16,0,0.01562133307258288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,128,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,128,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,128,0,1,float16,fp8,0,0.017456000049908955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,128,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,128,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,128,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,128,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,128,0,1,float16,float16,0,0.015504000087579092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,128,0,1,fp8,fp8,0,0.015722667177518208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,128,0,1,float16,float16,0,0.015418666104475657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,128,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,128,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,128,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,128,0,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,128,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,128,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,128,0,1,float16,fp8,0,0.016688000410795212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,128,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,128,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,128,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,0,0.09005866448084514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,0,0.08953600128491719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,1,128,0,1,fp8,fp8,0,0.08583999673525493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,0,0.09077866872151692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,0,0.08943466345469157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,2,128,0,1,fp8,fp8,0,0.08502399921417236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,0,0.08944533268610637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,4,128,0,1,fp8,fp8,0,0.08488532900810242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,0,0.08971200386683147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,0,0.09109333157539368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,0,0.09154666463534038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,16,8,128,0,1,fp8,fp8,0,0.08707200487454732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,0,0.0521066685517629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,0,0.0521919975678126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,16,128,0,1,fp8,fp8,0,0.04994133114814758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,0,0.051685333251953125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,0,0.05379199981689453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,1,128,0,1,fp8,fp8,0,0.05042133231957754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,0,0.05177066723505656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,0,0.05197866757710775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,2,128,0,1,fp8,fp8,0,0.05009066561857859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,0,0.05198933184146881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,0,0.05175999800364176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,4,128,0,1,fp8,fp8,0,0.04947733382383982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,0,0.05188799897829691
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,8,128,0,1,fp8,fp8,0,0.0498879998922348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,0,0.052389333645502724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,0,0.03331200033426285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,0,0.03332799921433131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,16,128,0,1,fp8,fp8,0,0.03363733241955439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,0,0.03385599950949351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,0,0.03346133232116699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,1,128,0,1,fp8,fp8,0,0.03366933266321818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,0,0.033589333295822144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,0,0.03341866781314214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,2,128,0,1,fp8,fp8,0,0.033600000043710075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,0,0.0342399999499321
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,4,128,0,1,fp8,fp8,0,0.033743999898433685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,0,0.034586665530999504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,0,0.03349333256483078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,8,128,0,1,fp8,fp8,0,0.03169599920511246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,0,0.033359999457995095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,0,0.025834667185942333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,0,0.02447466552257538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,0,0.02510400116443634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,16,128,0,1,fp8,fp8,0,0.025253333151340485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,0,0.025914666553338368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,1,128,0,1,fp8,fp8,0,0.023445333043734234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,0,0.025216000775496166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,0,0.02537599951028824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,2,128,0,1,fp8,fp8,0,0.023589332898457844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,0,0.025744001070658367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,0,0.025098666548728943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,4,128,0,1,fp8,fp8,0,0.0234400009115537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,0,0.02409599969784419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,0,0.02521066615978877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,16,8,128,0,1,fp8,fp8,0,0.025461333493391674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,0,0.022015998760859173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,0,0.02145066608985265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,16,128,0,1,fp8,fp8,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,0,0.022543999056021374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,0,0.021536000072956085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,1,128,0,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,0,0.02202133337656657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,2,128,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,0,0.02310933421055476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,0,0.024112001061439514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,4,128,0,1,fp8,fp8,0,0.021183999876181286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,0,0.01933866615096728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,16,8,128,0,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,16,128,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,0,0.018960000326236088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,1,128,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,2,128,0,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,0,0.019120000302791595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,4,128,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,16,8,128,0,1,fp8,fp8,0,0.018640000373125076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,0,0.017616000026464462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,16,128,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,0,0.017594666530688603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,1,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,0,0.016613333175579708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,2,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,0,0.016789333273967106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,4,128,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,16,8,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,16,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,1,128,0,1,fp8,fp8,0,0.01590399940808614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,0,0.015392000476519266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,2,128,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,0,0.015354666858911514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,4,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,16,8,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,0,0.01651200031240781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,16,128,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,1,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,0,0.015439999600251516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,2,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,4,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,16,8,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,128,0,1,fp8,fp8,0,4.331653277079265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,128,0,1,float16,float16,0,7.4319197336832685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,128,0,1,float16,fp8,0,7.683909098307292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,128,0,1,float16,float16,0,7.706799825032552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,128,0,1,float16,fp8,0,7.285541534423828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,128,0,1,fp8,fp8,0,4.712938626607259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,128,0,1,float16,float16,0,7.903301239013672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,128,0,1,float16,float16,0,3.995487848917643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,128,0,1,fp8,fp8,0,4.453541437784831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,128,0,1,float16,fp8,0,8.007770538330078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,128,0,1,float16,fp8,0,3.596005439758301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,128,0,1,fp8,fp8,0,2.3194665908813477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,128,0,1,float16,float16,0,3.0355733235677085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,128,0,1,float16,fp8,0,3.666597366333008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,128,0,1,fp8,fp8,0,2.261552015940348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,128,0,1,float16,float16,0,3.645733197530111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,128,0,1,fp8,fp8,0,2.369765281677246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,128,0,1,float16,fp8,0,4.204677263895671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,128,0,1,float16,float16,0,4.055343945821126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,128,0,1,fp8,fp8,0,2.2883520126342773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,128,0,1,float16,fp8,0,4.431056022644043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,128,0,1,float16,fp8,0,1.8099039395650227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,128,0,1,float16,float16,0,1.6921547253926594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,128,0,1,fp8,fp8,0,1.2225173314412434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,128,0,1,float16,float16,0,1.368303934733073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,128,0,1,fp8,fp8,0,1.1858346462249756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,128,0,1,float16,fp8,0,1.8064266840616863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,128,0,1,float16,float16,0,1.3877013524373372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,128,0,1,float16,fp8,0,1.3912053108215332
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,128,0,1,fp8,fp8,0,1.1937386989593506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,128,0,1,float16,float16,0,1.8502133687337239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,128,0,1,float16,fp8,0,1.4013439814249675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,128,0,1,fp8,fp8,0,1.1968906720479329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,128,0,1,float16,float16,0,0.7994240125020345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,128,0,1,fp8,fp8,0,0.7088373502095541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,128,0,1,float16,fp8,0,0.9169387022654215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,128,0,1,float16,float16,0,0.7819200356801351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,128,0,1,float16,fp8,0,0.7828906377156576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,128,0,1,fp8,fp8,0,0.6895039876302084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,128,0,1,float16,float16,0,0.7815999984741211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,128,0,1,fp8,fp8,0,0.6895146369934082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,128,0,1,float16,fp8,0,0.7860373655954996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,128,0,1,float16,float16,0,0.7943413257598877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,128,0,1,fp8,fp8,0,0.6917706330617269
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,128,0,1,float16,fp8,0,0.7898293336232504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,128,0,1,float16,float16,0,3.9913813273111978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,128,0,1,fp8,fp8,0,2.6343520482381186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,128,0,1,float16,fp8,0,4.594581286112468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,128,0,1,float16,float16,0,4.654453277587891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,128,0,1,fp8,fp8,0,2.646970589955648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,128,0,1,float16,fp8,0,4.116010665893555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,128,0,1,float16,float16,0,4.692341486612956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,128,0,1,float16,fp8,0,4.737519900004069
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,128,0,1,fp8,fp8,0,2.603072007497152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,128,0,1,float16,float16,0,1.5980693499247234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,128,0,1,float16,fp8,0,1.7362346649169922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,128,0,1,fp8,fp8,0,1.3893866539001465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,128,0,1,float16,float16,0,1.7934187253316243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,128,0,1,float16,fp8,0,1.9201547304789226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,128,0,1,fp8,fp8,0,1.3380907376607258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,128,0,1,float16,float16,0,1.9732160568237305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,128,0,1,float16,fp8,0,1.5664374033610027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,128,0,1,fp8,fp8,0,1.3410719235738118
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,128,0,1,float16,float16,0,1.832533359527588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,128,0,1,float16,float16,0,0.8719627062479655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,128,0,1,fp8,fp8,0,1.347061316172282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,128,0,1,float16,fp8,0,0.892410675684611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,128,0,1,float16,fp8,0,2.3735574086507163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,128,0,1,fp8,fp8,0,0.7703626950581869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,128,0,1,float16,float16,0,0.8497172991434733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,128,0,1,float16,fp8,0,0.8682080109914144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,128,0,1,fp8,fp8,0,1.1481386820475261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,128,0,1,float16,float16,0,0.8491093317667643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,128,0,1,float16,fp8,0,0.9738773504892985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,128,0,1,fp8,fp8,0,0.8686613241831461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,128,0,1,float16,float16,0,0.8593706289927164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,128,0,1,float16,fp8,0,0.8600426514943441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,128,0,1,float16,float16,0,0.5209226608276367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,128,0,1,fp8,fp8,0,0.7817920049031576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,128,0,1,float16,fp8,0,0.5197279850641886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,128,0,1,fp8,fp8,0,0.46147199471791583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,128,0,1,float16,float16,0,0.5010986725489298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,128,0,1,float16,fp8,0,0.5000053246816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,128,0,1,fp8,fp8,0,0.4403093258539836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,128,0,1,float16,float16,0,0.5007413228352865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,128,0,1,float16,fp8,0,0.5054666598637899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,128,0,1,fp8,fp8,0,0.44572265942891437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,128,0,1,float16,float16,0,0.5028160015741984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,128,0,1,float16,fp8,0,0.5053280194600424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,128,0,1,fp8,fp8,0,0.4523306687672933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,128,0,1,float16,float16,0,2.6352052688598633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,128,0,1,fp8,fp8,0,1.8507626851399739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,128,0,1,float16,fp8,0,2.2397120793660483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,128,0,1,float16,float16,0,2.1745920181274414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,128,0,1,fp8,fp8,0,1.8577973047892253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,128,0,1,float16,fp8,0,2.9930505752563477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,128,0,1,float16,fp8,0,2.937823931376139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,128,0,1,float16,float16,0,3.1300532023111978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,128,0,1,fp8,fp8,0,1.874959945678711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,128,0,1,float16,float16,0,1.1859947045644124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,128,0,1,float16,fp8,0,1.3454880714416504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,128,0,1,fp8,fp8,0,1.0321280161539714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,128,0,1,float16,float16,0,1.1380480130513508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,128,0,1,float16,fp8,0,1.1437653700510662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,128,0,1,fp8,fp8,0,0.9874880313873291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,128,0,1,float16,float16,0,1.1420640150705974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,128,0,1,float16,fp8,0,1.2478826840718586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,128,0,1,fp8,fp8,0,0.9934933185577393
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,128,0,1,float16,float16,0,1.1569546858469646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,128,0,1,float16,fp8,0,1.163589318593343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,128,0,1,float16,float16,0,0.6599839925765991
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,128,0,1,float16,fp8,0,0.668224016825358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,128,0,1,fp8,fp8,0,1.007813294728597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,128,0,1,fp8,fp8,0,0.5796533425649008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,128,0,1,float16,float16,0,0.6311359802881876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,128,0,1,float16,fp8,0,0.6380266745885214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,128,0,1,fp8,fp8,0,0.5544319947560629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,128,0,1,float16,float16,0,0.6358453432718912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,128,0,1,float16,fp8,0,0.6719253063201904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,128,0,1,fp8,fp8,0,0.55731733640035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,128,0,1,float16,float16,0,0.6419093211491903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,128,0,1,float16,fp8,0,0.6512053410212199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,128,0,1,fp8,fp8,0,0.5632906754811605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,128,0,1,float16,float16,0,0.3949493169784546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,128,0,1,float16,fp8,0,0.40507733821868896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,128,0,1,fp8,fp8,0,0.35442133744557697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,128,0,1,float16,float16,0,0.3797760009765625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,128,0,1,float16,fp8,0,0.3821706771850586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,128,0,1,fp8,fp8,0,0.3386133511861165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,128,0,1,float16,float16,0,0.3819520076115926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,128,0,1,float16,fp8,0,0.38256001472473145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,128,0,1,fp8,fp8,0,0.3381439844767253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,128,0,1,float16,float16,0,0.3838026523590088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,128,0,1,float16,fp8,0,0.38778666655222577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,128,0,1,fp8,fp8,0,0.34381866455078125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,128,0,1,fp8,fp8,0,2.5002880096435547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,128,0,1,float16,float16,0,3.735328038533529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,128,0,1,float16,fp8,0,4.357194582621257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,128,0,1,float16,float16,0,3.019866625467936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,128,0,1,float16,fp8,0,4.249391873677571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,128,0,1,fp8,fp8,0,2.5039893786112466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,128,0,1,float16,float16,0,4.355146725972493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,128,0,1,float16,float16,0,1.5344160397847493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,128,0,1,float16,fp8,0,3.82150936126709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,128,0,1,fp8,fp8,0,2.5697333017985025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,128,0,1,fp8,fp8,0,1.3495945930480957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,128,0,1,float16,fp8,0,2.1913280487060547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,128,0,1,fp8,fp8,0,1.2734239896138508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,128,0,1,float16,fp8,0,1.4672106107076008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,128,0,1,float16,float16,0,1.4682772954305012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,128,0,1,float16,float16,0,1.4778560002644856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,128,0,1,fp8,fp8,0,1.2792479991912842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,128,0,1,float16,fp8,0,1.4777065912882488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,128,0,1,float16,float16,0,1.4780160586039226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,128,0,1,float16,fp8,0,1.5367733637491863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,128,0,1,float16,fp8,0,0.8299679756164551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,128,0,1,float16,float16,0,0.8388160069783529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,128,0,1,fp8,fp8,0,1.2919360001881917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,128,0,1,fp8,fp8,0,0.7319200038909912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,128,0,1,float16,fp8,0,0.8004480202992758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,128,0,1,float16,float16,0,0.7903680006663004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,128,0,1,fp8,fp8,0,0.6888426939646403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,128,0,1,float16,float16,0,0.805728038152059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,128,0,1,fp8,fp8,0,0.6946612993876139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,128,0,1,float16,fp8,0,0.8066720167795817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,128,0,1,float16,float16,0,0.7964800198872884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,128,0,1,float16,float16,0,0.47892268498738605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,128,0,1,fp8,fp8,0,0.7006186644236246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,128,0,1,float16,fp8,0,0.8150080045064291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,128,0,1,float16,fp8,0,0.6615840196609497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,128,0,1,fp8,fp8,0,0.42149333159128827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,128,0,1,float16,float16,0,0.4474133253097534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,128,0,1,float16,fp8,0,0.4503946701685588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,128,0,1,fp8,fp8,0,0.39565332730611164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,128,0,1,float16,float16,0,0.4491893450419108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,128,0,1,float16,fp8,0,0.4508959849675496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,128,0,1,fp8,fp8,0,0.4009386698404948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,128,0,1,float16,float16,0,0.45360533396402997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,128,0,1,float16,float16,0,0.2922240098317464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,128,0,1,float16,fp8,0,0.4557653268178304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,128,0,1,float16,fp8,0,0.2862559954325358
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,128,0,1,fp8,fp8,0,0.40573867162068683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,128,0,1,fp8,fp8,0,0.2607733408610026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,128,0,1,float16,float16,0,0.2738879919052124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,128,0,1,fp8,fp8,0,0.25202133258183795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,128,0,1,float16,fp8,0,0.27354133129119873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,128,0,1,float16,float16,0,0.27402132749557495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,128,0,1,float16,fp8,0,0.274453341960907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,128,0,1,fp8,fp8,0,0.25150932868321735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,128,0,1,float16,float16,0,0.2757866581281026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,128,0,1,float16,fp8,0,0.28013867139816284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,128,0,1,fp8,fp8,0,0.25120000044504803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,128,0,1,float16,float16,0,1.750341256459554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,128,0,1,float16,fp8,0,1.751413345336914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,128,0,1,fp8,fp8,0,1.5306347211201985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,128,0,1,float16,float16,0,1.862885316212972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,128,0,1,float16,fp8,0,1.7629920641581218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,128,0,1,float16,float16,0,1.9433600107828777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,128,0,1,fp8,fp8,0,1.5346345901489258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,128,0,1,float16,fp8,0,2.1141600608825684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,128,0,1,float16,fp8,0,0.9919359683990479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,128,0,1,float16,float16,0,1.2469600041707356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,128,0,1,fp8,fp8,0,0.8650293350219727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,128,0,1,fp8,fp8,0,1.5607360204060872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,128,0,1,float16,float16,0,0.9504799842834473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,128,0,1,fp8,fp8,0,0.8037760257720947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,128,0,1,float16,fp8,0,0.9690986474355062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,128,0,1,float16,float16,0,0.9301439921061198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,128,0,1,float16,fp8,0,0.9286719957987467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,128,0,1,fp8,fp8,0,0.8087999820709229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,128,0,1,float16,float16,0,1.0098559856414795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,128,0,1,float16,fp8,0,0.938810666402181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,128,0,1,float16,float16,0,0.5293706655502319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,128,0,1,fp8,fp8,0,1.0195626417795818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,128,0,1,float16,fp8,0,0.5575093428293864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,128,0,1,fp8,fp8,0,0.5011039972305298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,128,0,1,float16,float16,0,0.5029120047887167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,128,0,1,float16,fp8,0,0.5240159829457601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,128,0,1,fp8,fp8,0,0.44385067621866864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,128,0,1,float16,float16,0,0.5056159893671671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,128,0,1,fp8,fp8,0,0.44969598452250165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,128,0,1,float16,fp8,0,0.5285439888636271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,128,0,1,float16,float16,0,0.5102133353551229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,128,0,1,float16,fp8,0,0.5169440110524496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,128,0,1,fp8,fp8,0,0.4572266737620036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,128,0,1,float16,float16,0,0.30905065933863324
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,128,0,1,float16,fp8,0,0.3132213354110718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,128,0,1,fp8,fp8,0,0.2797600030899048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,128,0,1,fp8,fp8,0,0.25838400920232135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,128,0,1,float16,float16,0,0.2895680069923401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,128,0,1,float16,fp8,0,0.29064534107844037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,128,0,1,fp8,fp8,0,0.2643946607907613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,128,0,1,float16,float16,0,0.29175466299057007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,128,0,1,float16,fp8,0,0.2919360001881917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,128,0,1,fp8,fp8,0,0.26773866017659503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,128,0,1,float16,float16,0,0.30073599020640057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,128,0,1,float16,fp8,0,0.29918400446573895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,128,0,1,fp8,fp8,0,0.18131200472513834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,128,0,1,float16,float16,0,0.19594667355219522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,128,0,1,float16,fp8,0,0.19729600350062051
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,128,0,1,float16,fp8,0,0.18986666202545166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,128,0,1,fp8,fp8,0,0.1731040080388387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,128,0,1,float16,float16,0,0.19195199012756348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,128,0,1,float16,float16,0,0.19269333283106485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,128,0,1,fp8,fp8,0,0.17228267590204874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,128,0,1,float16,fp8,0,0.19192532698313394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,128,0,1,float16,float16,0,0.19033066431681314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,128,0,1,float16,fp8,0,0.19262399276097616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,128,0,1,fp8,fp8,0,0.17454934120178223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,128,0,1,fp8,fp8,0,1.576906681060791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,128,0,1,float16,float16,0,1.8598079681396484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,128,0,1,float16,fp8,0,1.903146743774414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,128,0,1,float16,float16,0,1.849130630493164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,128,0,1,float16,fp8,0,1.8087306022644043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,128,0,1,fp8,fp8,0,1.5826346079508464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,128,0,1,float16,float16,0,1.8480960528055828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,128,0,1,float16,float16,0,0.9940480391184489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,128,0,1,float16,fp8,0,1.934757391611735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,128,0,1,float16,fp8,0,1.219061295191447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,128,0,1,fp8,fp8,0,1.6116746266682942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,128,0,1,fp8,fp8,0,0.8867626984914144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,128,0,1,float16,float16,0,0.9320639769236246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,128,0,1,fp8,fp8,0,0.8150346279144287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,128,0,1,float16,fp8,0,0.9409920374552408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,128,0,1,float16,float16,0,0.9324426651000977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,128,0,1,fp8,fp8,0,0.816426674524943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,128,0,1,float16,fp8,0,0.98853866259257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,128,0,1,float16,float16,0,0.9665653705596924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,128,0,1,float16,fp8,0,0.9488053321838379
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,128,0,1,float16,float16,0,0.5771040121714274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,128,0,1,float16,fp8,0,0.6134560108184814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,128,0,1,fp8,fp8,0,0.8302453358968099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,128,0,1,fp8,fp8,0,0.4760640064875285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,128,0,1,fp8,fp8,0,0.4402666489283244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,128,0,1,float16,fp8,0,0.5002293189366659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,128,0,1,float16,float16,0,0.5761280059814453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,128,0,1,fp8,fp8,0,0.44280532995859784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,128,0,1,float16,fp8,0,0.500981330871582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,128,0,1,float16,float16,0,0.5001440048217773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,128,0,1,float16,float16,0,0.50327467918396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,128,0,1,float16,float16,0,0.29651200771331787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,128,0,1,float16,fp8,0,0.30037333567937213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,128,0,1,fp8,fp8,0,0.4489920139312744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,128,0,1,float16,fp8,0,0.5097279946009318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,128,0,1,fp8,fp8,0,0.26959999402364093
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,128,0,1,float16,float16,0,0.27450132369995117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,128,0,1,float16,fp8,0,0.27668799956639606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,128,0,1,fp8,fp8,0,0.2502880096435547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,128,0,1,float16,float16,0,0.2765653332074483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,128,0,1,float16,fp8,0,0.2781706651051839
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,128,0,1,fp8,fp8,0,0.25082133213679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,128,0,1,float16,float16,0,0.28428266445795697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,128,0,1,float16,fp8,0,0.28330133358637494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,128,0,1,float16,float16,0,0.1786293387413025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,128,0,1,float16,fp8,0,0.17981332540512085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,128,0,1,fp8,fp8,0,0.2520586649576823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,128,0,1,fp8,fp8,0,0.16307199994723
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,128,0,1,float16,float16,0,0.16821332772572836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,128,0,1,float16,fp8,0,0.16661333044370016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,128,0,1,fp8,fp8,0,0.1485919952392578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,128,0,1,float16,float16,0,0.1641279955705007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,128,0,1,float16,fp8,0,0.17263466119766235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,128,0,1,fp8,fp8,0,0.14833066860834757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,128,0,1,float16,float16,0,0.1671733260154724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,128,0,1,float16,fp8,0,0.16922666629155478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,128,0,1,float16,float16,0,0.11646399895350139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,128,0,1,fp8,fp8,0,0.15621333320935568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,128,0,1,float16,fp8,0,0.11525332927703857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,128,0,1,fp8,fp8,0,0.10661333799362183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,128,0,1,float16,float16,0,0.11477866768836975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,128,0,1,float16,fp8,0,0.11499733726183574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,128,0,1,fp8,fp8,0,0.10307733217875163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,128,0,1,float16,fp8,0,0.1132319966952006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,128,0,1,float16,float16,0,0.11210133632024129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,128,0,1,fp8,fp8,0,0.10482133428255717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,128,0,1,float16,float16,0,0.1123306651910146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,128,0,1,float16,fp8,0,0.11334400375684102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,128,0,1,fp8,fp8,0,0.10378666718800862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,128,0,1,float16,float16,0,1.1669493516286213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,128,0,1,fp8,fp8,0,1.0249653657277424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,128,0,1,float16,fp8,0,1.1725760300954182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,128,0,1,float16,float16,0,1.2046186923980713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,128,0,1,float16,fp8,0,1.1802666982014973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,128,0,1,fp8,fp8,0,1.0332799752553303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,128,0,1,fp8,fp8,0,1.0557173093159993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,128,0,1,float16,fp8,0,1.2038666407267253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,128,0,1,float16,float16,0,1.3364267349243164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,128,0,1,float16,float16,0,0.6594719886779785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,128,0,1,float16,fp8,0,0.6688053607940674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,128,0,1,fp8,fp8,0,0.6987093289693197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,128,0,1,float16,float16,0,0.6060053507486979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,128,0,1,float16,fp8,0,0.6400533517201742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,128,0,1,fp8,fp8,0,0.5352053244908651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,128,0,1,float16,float16,0,0.6106506586074829
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,128,0,1,fp8,fp8,0,0.5379786491394043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,128,0,1,float16,fp8,0,0.6160693168640137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,128,0,1,float16,float16,0,0.6225066582361857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,128,0,1,float16,fp8,0,0.6243679920832316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,128,0,1,float16,float16,0,0.35786132017771405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,128,0,1,fp8,fp8,0,0.5500533183415731
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,128,0,1,float16,fp8,0,0.35995733737945557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,128,0,1,fp8,fp8,0,0.3208373387654622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,128,0,1,float16,float16,0,0.33101866642634076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,128,0,1,float16,fp8,0,0.33058132727940875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,128,0,1,fp8,fp8,0,0.2918293277422587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,128,0,1,fp8,fp8,0,0.2967146635055542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,128,0,1,float16,float16,0,0.329584002494812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,128,0,1,float16,fp8,0,0.33107199271519977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,128,0,1,float16,float16,0,0.2065546711285909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,128,0,1,float16,float16,0,0.33372799555460614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,128,0,1,fp8,fp8,0,0.30031466484069824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,128,0,1,float16,fp8,0,0.3410666783650716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,128,0,1,float16,fp8,0,0.20909865697224936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,128,0,1,fp8,fp8,0,0.1853440006573995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,128,0,1,float16,float16,0,0.18129066626230875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,128,0,1,float16,fp8,0,0.18376533190409342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,128,0,1,fp8,fp8,0,0.1667733391125997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,128,0,1,float16,float16,0,0.18494399388631186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,128,0,1,float16,fp8,0,0.1872746745745341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,128,0,1,fp8,fp8,0,0.1688800056775411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,128,0,1,float16,float16,0,0.19271467129389444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,128,0,1,float16,fp8,0,0.19146132469177246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,128,0,1,float16,float16,0,0.1220906674861908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,128,0,1,fp8,fp8,0,0.1741066575050354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,128,0,1,float16,fp8,0,0.12628799676895142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,128,0,1,fp8,fp8,0,0.11782399813334148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,128,0,1,float16,float16,0,0.11829866965611775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,128,0,1,float16,fp8,0,0.11737066507339478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,128,0,1,fp8,fp8,0,0.10666666428248088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,128,0,1,float16,float16,0,0.11617066462834676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,128,0,1,float16,fp8,0,0.11988266309102376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,128,0,1,fp8,fp8,0,0.10716799894968669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,128,0,1,float16,float16,0,0.11580800016721089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,128,0,1,float16,fp8,0,0.11818666259447734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,128,0,1,float16,float16,0,0.07763200004895528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,128,0,1,fp8,fp8,0,0.10884267091751099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,128,0,1,float16,fp8,0,0.07831466694672902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,128,0,1,fp8,fp8,0,0.0726506660381953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,128,0,1,float16,float16,0,0.07681599756081899
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,128,0,1,float16,fp8,0,0.0769706666469574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,128,0,1,fp8,fp8,0,0.07252266506354015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,128,0,1,float16,float16,0,0.07483733197053273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,128,0,1,float16,fp8,0,0.07709333300590515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,128,0,1,fp8,fp8,0,0.07230933507283528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,128,0,1,float16,float16,0,0.07784000039100647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,128,0,1,float16,fp8,0,0.07678933441638947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,128,0,1,fp8,fp8,0,0.07230933507283528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,128,0,1,float16,float16,0,1.2818026542663574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,128,0,1,fp8,fp8,0,1.1205066839853923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,128,0,1,float16,fp8,0,1.2835413614908855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,128,0,1,float16,float16,0,1.2892693678538005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,128,0,1,fp8,fp8,0,1.1358719666798909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,128,0,1,float16,fp8,0,1.2983307043711345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,128,0,1,float16,float16,0,1.3115306695302327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,128,0,1,float16,fp8,0,1.315226634343465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,128,0,1,float16,float16,0,0.7241760094960531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,128,0,1,float16,fp8,0,0.7400639851888021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,128,0,1,fp8,fp8,0,0.7269279956817627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,128,0,1,fp8,fp8,0,1.1633600393931072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,128,0,1,float16,float16,0,0.6647040049235026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,128,0,1,fp8,fp8,0,0.580351988474528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,128,0,1,float16,fp8,0,0.6870506604512533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,128,0,1,float16,float16,0,0.660314679145813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,128,0,1,float16,fp8,0,0.6683839956919352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,128,0,1,fp8,fp8,0,0.584768017133077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,128,0,1,float16,float16,0,0.6793013413747152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,128,0,1,float16,fp8,0,0.6792266368865967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,128,0,1,float16,float16,0,0.3804853359858195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,128,0,1,float16,fp8,0,0.39025068283081055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,128,0,1,fp8,fp8,0,0.6011093457539877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,128,0,1,fp8,fp8,0,0.34645867347717285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,128,0,1,float16,float16,0,0.3476426601409912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,128,0,1,fp8,fp8,0,0.310208002726237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,128,0,1,float16,fp8,0,0.35068265597025555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,128,0,1,float16,float16,0,0.3504106601079305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,128,0,1,float16,fp8,0,0.3512586752573649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,128,0,1,fp8,fp8,0,0.3148426612218221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,128,0,1,float16,float16,0,0.35623466968536377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,128,0,1,float16,fp8,0,0.35946667194366455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,128,0,1,float16,float16,0,0.2112213373184204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,128,0,1,fp8,fp8,0,0.31938133637110394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,128,0,1,fp8,fp8,0,0.19382933775583902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,128,0,1,float16,fp8,0,0.21530665953954062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,128,0,1,float16,float16,0,0.18904000520706177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,128,0,1,fp8,fp8,0,0.1730453372001648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,128,0,1,float16,fp8,0,0.18928533792495728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,128,0,1,float16,float16,0,0.1917919913927714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,128,0,1,fp8,fp8,0,0.17548267046610513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,128,0,1,float16,fp8,0,0.19008533159891763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,128,0,1,float16,float16,0,0.19662932554880777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,128,0,1,float16,float16,0,0.12405866384506226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,128,0,1,float16,fp8,0,0.20031466086705527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,128,0,1,fp8,fp8,0,0.17750932772954306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,128,0,1,float16,fp8,0,0.12662933270136514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,128,0,1,fp8,fp8,0,0.11569600303967793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,128,0,1,float16,float16,0,0.10994133353233337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,128,0,1,fp8,fp8,0,0.09957333405812581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,128,0,1,float16,fp8,0,0.11317867040634155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,128,0,1,float16,float16,0,0.11203199625015259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,128,0,1,float16,fp8,0,0.11203199625015259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,128,0,1,fp8,fp8,0,0.10080533226331075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,128,0,1,float16,float16,0,0.11251200238863628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,128,0,1,float16,float16,0,0.0772213339805603
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,128,0,1,float16,fp8,0,0.11360533038775127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,128,0,1,fp8,fp8,0,0.10289067029953003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,128,0,1,float16,fp8,0,0.07893333335717519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,128,0,1,fp8,fp8,0,0.07399466633796692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,128,0,1,float16,float16,0,0.07445333401362102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,128,0,1,float16,fp8,0,0.07461866736412048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,128,0,1,fp8,fp8,0,0.06880000233650208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,128,0,1,float16,float16,0,0.07633600135644276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,128,0,1,float16,fp8,0,0.07494399944941203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,128,0,1,fp8,fp8,0,0.06833066542943318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,128,0,1,float16,float16,0,0.07691733539104462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,128,0,1,float16,fp8,0,0.07656533519426982
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,128,0,1,float16,float16,0,0.05801600217819214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,128,0,1,fp8,fp8,0,0.06880000233650208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,128,0,1,float16,fp8,0,0.05819199979305267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,128,0,1,fp8,fp8,0,0.05401599903901418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,128,0,1,float16,float16,0,0.05603733162085215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,128,0,1,float16,fp8,0,0.05653866628805796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,128,0,1,fp8,fp8,0,0.05203733344872793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,128,0,1,float16,float16,0,0.05637866755326589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,128,0,1,float16,fp8,0,0.05610666672388712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,128,0,1,fp8,fp8,0,0.05216533442338308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,128,0,1,float16,float16,0,0.056128000219662987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,128,0,1,float16,fp8,0,0.05634133517742157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,128,0,1,fp8,fp8,0,0.05203199883302053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,128,0,1,float16,float16,0,0.9191306432088217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,128,0,1,fp8,fp8,0,0.8037493228912354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,128,0,1,float16,fp8,0,0.9144480228424072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,128,0,1,float16,float16,0,0.9286026954650879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,128,0,1,fp8,fp8,0,0.8101867039998373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,128,0,1,float16,fp8,0,0.928704023361206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,128,0,1,float16,float16,0,0.9384693304697672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,128,0,1,float16,float16,0,0.5220746596654257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,128,0,1,fp8,fp8,0,0.8221440315246582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,128,0,1,float16,fp8,0,0.9376959800720215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,128,0,1,fp8,fp8,0,0.468176007270813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,128,0,1,float16,fp8,0,0.5231360197067261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,128,0,1,float16,float16,0,0.45974934101104736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,128,0,1,float16,fp8,0,0.46593066056569415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,128,0,1,fp8,fp8,0,0.4101066589355469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,128,0,1,float16,float16,0,0.46614933013916016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,128,0,1,fp8,fp8,0,0.41170668601989746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,128,0,1,float16,fp8,0,0.466810663541158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,128,0,1,float16,float16,0,0.4774080117543538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,128,0,1,float16,float16,0,0.27164800961812335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,128,0,1,fp8,fp8,0,0.4230133295059204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,128,0,1,float16,fp8,0,0.4789973497390747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,128,0,1,float16,fp8,0,0.27879999081293744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,128,0,1,fp8,fp8,0,0.24944533904393515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,128,0,1,float16,float16,0,0.242576003074646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,128,0,1,float16,fp8,0,0.2441920042037964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,128,0,1,fp8,fp8,0,0.22068800528844199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,128,0,1,float16,float16,0,0.24490133921305338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,128,0,1,float16,fp8,0,0.2463093400001526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,128,0,1,fp8,fp8,0,0.22196267048517862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,128,0,1,float16,float16,0,0.2526826659838359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,128,0,1,float16,fp8,0,0.25331199169158936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,128,0,1,float16,float16,0,0.1515733301639557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,128,0,1,fp8,fp8,0,0.2278560002644857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,128,0,1,float16,fp8,0,0.152730663617452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,128,0,1,fp8,fp8,0,0.1402720014254252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,128,0,1,float16,fp8,0,0.13091199596722922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,128,0,1,float16,float16,0,0.13060800234476724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,128,0,1,fp8,fp8,0,0.11703466375668843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,128,0,1,float16,float16,0,0.1307199994723002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,128,0,1,float16,fp8,0,0.13285866379737854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,128,0,1,fp8,fp8,0,0.12272533774375916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,128,0,1,float16,float16,0,0.1365226705869039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,128,0,1,float16,fp8,0,0.1388213336467743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,128,0,1,fp8,fp8,0,0.1281706690788269
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,128,0,1,float16,float16,0,0.08773333827654521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,128,0,1,float16,fp8,0,0.08800533413887024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,128,0,1,fp8,fp8,0,0.08570667107899983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,128,0,1,float16,float16,0,0.0813920001188914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,128,0,1,float16,fp8,0,0.08106133341789246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,128,0,1,fp8,fp8,0,0.07298133273919423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,128,0,1,float16,float16,0,0.08115200201670329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,128,0,1,float16,fp8,0,0.0812906672557195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,128,0,1,fp8,fp8,0,0.07298666735490163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,128,0,1,float16,float16,0,0.08186666667461395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,128,0,1,float16,fp8,0,0.0831520011027654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,128,0,1,fp8,fp8,0,0.07514133552710216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,128,0,1,float16,float16,0,0.05284800132115682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,128,0,1,float16,fp8,0,0.05403199791908264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,128,0,1,fp8,fp8,0,0.05209066470464071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,128,0,1,float16,float16,0,0.052229334910710655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,128,0,1,fp8,fp8,0,0.047797332207361855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,128,0,1,float16,fp8,0,0.05227733155091604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,128,0,1,float16,float16,0,0.05247466762860616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,128,0,1,float16,fp8,0,0.05171200136343638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,128,0,1,fp8,fp8,0,0.04805333415667216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,128,0,1,float16,float16,0,0.05316799879074097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,128,0,1,float16,fp8,0,0.05203199883302053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,128,0,1,fp8,fp8,0,0.04823466638724009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,128,0,1,float16,float16,0,0.04641066491603851
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,128,0,1,float16,fp8,0,0.04796266555786133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,128,0,1,fp8,fp8,0,0.043893332282702126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,128,0,1,float16,float16,0,0.0459146648645401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,128,0,1,float16,fp8,0,0.04789866507053375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,128,0,1,fp8,fp8,0,0.04381866753101349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,128,0,1,float16,float16,0,0.046154667933781944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,128,0,1,float16,fp8,0,0.04765866696834564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,128,0,1,fp8,fp8,0,0.04390933116277059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,128,0,1,float16,float16,0,0.04577066500981649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,128,0,1,float16,fp8,0,0.04756266872088114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,128,0,1,fp8,fp8,0,0.04394666850566864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,128,0,1,float16,float16,0,0.912384033203125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,128,0,1,fp8,fp8,0,0.8749439716339111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,128,0,1,float16,fp8,0,0.9096906979878744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,128,0,1,fp8,fp8,0,0.8941760063171387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,128,0,1,float16,float16,0,0.9365386962890625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,128,0,1,float16,fp8,0,0.9324906667073568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,128,0,1,float16,float16,0,0.9494986534118652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,128,0,1,fp8,fp8,0,0.924623966217041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,128,0,1,float16,fp8,0,0.9453813234965006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,128,0,1,float16,fp8,0,0.5275839964548746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,128,0,1,float16,float16,0,0.5358026822408041
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,128,0,1,fp8,fp8,0,0.5194666783014933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,128,0,1,fp8,fp8,0,0.45051201184590656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,128,0,1,float16,fp8,0,0.46980265776316327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,128,0,1,float16,float16,0,0.4817119836807251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,128,0,1,float16,float16,0,0.4702346722284953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,128,0,1,fp8,fp8,0,0.46056532859802246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,128,0,1,float16,fp8,0,0.4809759855270386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,128,0,1,float16,float16,0,0.49137067794799805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,128,0,1,float16,fp8,0,0.48741332689921063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,128,0,1,float16,float16,0,0.28516266743342084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,128,0,1,float16,fp8,0,0.28090133269627887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,128,0,1,fp8,fp8,0,0.4695626497268677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,128,0,1,fp8,fp8,0,0.271722674369812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,128,0,1,float16,float16,0,0.2448106606801351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,128,0,1,float16,fp8,0,0.24529600143432617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,128,0,1,fp8,fp8,0,0.23602133989334106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,128,0,1,float16,float16,0,0.25226134061813354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,128,0,1,float16,fp8,0,0.25171200434366864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,128,0,1,fp8,fp8,0,0.24565333127975464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,128,0,1,float16,float16,0,0.25705599784851074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,128,0,1,float16,fp8,0,0.25667200485865277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,128,0,1,float16,float16,0,0.1570026675860087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,128,0,1,fp8,fp8,0,0.24552534023920694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,128,0,1,float16,fp8,0,0.1534933348496755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,128,0,1,float16,float16,0,0.13412800431251526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,128,0,1,fp8,fp8,0,0.1492533286412557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,128,0,1,float16,fp8,0,0.13401599725087485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,128,0,1,fp8,fp8,0,0.1253546675046285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,128,0,1,float16,float16,0,0.13758933544158936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,128,0,1,float16,fp8,0,0.13769066333770752
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,128,0,1,fp8,fp8,0,0.1332319974899292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,128,0,1,float16,float16,0,0.14164800445238748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,128,0,1,float16,fp8,0,0.1410719950993856
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,128,0,1,float16,float16,0,0.08981866637865703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,128,0,1,float16,fp8,0,0.08798399567604065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,128,0,1,fp8,fp8,0,0.13506666819254556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,128,0,1,fp8,fp8,0,0.08789333701133728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,128,0,1,float16,fp8,0,0.07878933350245158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,128,0,1,float16,float16,0,0.07835733393828075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,128,0,1,fp8,fp8,0,0.07088533540566762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,128,0,1,float16,float16,0,0.08032000064849854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,128,0,1,fp8,fp8,0,0.07187733550866444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,128,0,1,float16,fp8,0,0.07884799937407176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,128,0,1,float16,float16,0,0.07918400069077809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,128,0,1,float16,fp8,0,0.08018666505813599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,128,0,1,float16,float16,0,0.05438933273156484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,128,0,1,fp8,fp8,0,0.07428266604741414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,128,0,1,float16,fp8,0,0.05421866476535797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,128,0,1,fp8,fp8,0,0.05165866514046987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,128,0,1,float16,float16,0,0.052330667773882546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,128,0,1,float16,fp8,0,0.052095999320348106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,128,0,1,fp8,fp8,0,0.0476800004641215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,128,0,1,float16,float16,0,0.05198933184146881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,128,0,1,float16,fp8,0,0.0521066685517629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,128,0,1,fp8,fp8,0,0.04814399778842926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,128,0,1,float16,float16,0,0.05317866802215576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,128,0,1,float16,fp8,0,0.05227200190226237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,128,0,1,float16,float16,0,0.0396373321612676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,128,0,1,fp8,fp8,0,0.04975999891757965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,128,0,1,float16,fp8,0,0.03786666691303253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,128,0,1,float16,float16,0,0.03755199909210205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,128,0,1,fp8,fp8,0,0.03735466549793879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,128,0,1,float16,fp8,0,0.03758399933576584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,128,0,1,fp8,fp8,0,0.0359253336985906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,128,0,1,float16,float16,0,0.0379573330283165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,128,0,1,float16,fp8,0,0.03783999880154928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,128,0,1,fp8,fp8,0,0.035114665826161705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,128,0,1,float16,float16,0,0.03812266637881597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,128,0,1,float16,fp8,0,0.03793599953254064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,128,0,1,fp8,fp8,0,0.03551466763019562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,128,0,1,float16,float16,0,0.03465066601832708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,128,0,1,float16,fp8,0,0.0354666660229365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,128,0,1,fp8,fp8,0,0.03350399931271871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,128,0,1,float16,float16,0,0.03374933451414108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,128,0,1,float16,fp8,0,0.035589332381884255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,128,0,1,fp8,fp8,0,0.031632001201311745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,128,0,1,float16,float16,0,0.034832000732421875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,128,0,1,float16,fp8,0,0.03364266703526179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,128,0,1,fp8,fp8,0,0.03147733211517334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,128,0,1,float16,float16,0,0.035536001125971474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,128,0,1,float16,fp8,0,0.03514666606982549
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,128,0,1,fp8,fp8,0,0.031557333966096245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,128,0,1,float16,fp8,0,0.7754506270090739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,128,0,1,float16,float16,0,0.7774453163146973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,128,0,1,fp8,fp8,0,0.7566026846567789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,128,0,1,float16,float16,0,0.7969760100046793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,128,0,1,float16,fp8,0,0.7912586530049642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,128,0,1,fp8,fp8,0,0.7736746470133463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,128,0,1,float16,float16,0,0.8177813688913981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,128,0,1,float16,fp8,0,0.8097226619720459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,128,0,1,fp8,fp8,0,0.7995893160502116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,128,0,1,float16,float16,0,0.4656533400217692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,128,0,1,fp8,fp8,0,0.4569600025812785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,128,0,1,float16,fp8,0,0.45999467372894287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,128,0,1,float16,float16,0,0.399402658144633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,128,0,1,float16,fp8,0,0.3983093500137329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,128,0,1,fp8,fp8,0,0.3891520102818807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,128,0,1,float16,fp8,0,0.4081546862920125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,128,0,1,fp8,fp8,0,0.3988639911015828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,128,0,1,float16,float16,0,0.42016534010569256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,128,0,1,float16,float16,0,0.41120000680287677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,128,0,1,float16,float16,0,0.24975999196370444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,128,0,1,float16,fp8,0,0.24252265691757202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,128,0,1,float16,fp8,0,0.4140479962031047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,128,0,1,fp8,fp8,0,0.4075946807861328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,128,0,1,fp8,fp8,0,0.24013332525889078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,128,0,1,float16,float16,0,0.21150400241216025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,128,0,1,float16,fp8,0,0.20810665686925253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,128,0,1,fp8,fp8,0,0.20025600989659628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,128,0,1,float16,float16,0,0.21793067455291748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,128,0,1,float16,fp8,0,0.21729065974553427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,128,0,1,fp8,fp8,0,0.21040532986323038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,128,0,1,float16,float16,0,0.21949867407480875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,128,0,1,float16,fp8,0,0.22065067291259766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,128,0,1,float16,float16,0,0.1351093351840973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,128,0,1,float16,fp8,0,0.13056000073750815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,128,0,1,fp8,fp8,0,0.2145813306172689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,128,0,1,fp8,fp8,0,0.12973333398501077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,128,0,1,float16,float16,0,0.1122826635837555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,128,0,1,float16,fp8,0,0.1125333309173584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,128,0,1,fp8,fp8,0,0.10605333248774211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,128,0,1,float16,float16,0,0.11748799681663513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,128,0,1,float16,fp8,0,0.11668800314267476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,128,0,1,fp8,fp8,0,0.11359999577204387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,128,0,1,float16,float16,0,0.1204746663570404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,128,0,1,float16,fp8,0,0.11859732866287231
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,128,0,1,float16,float16,0,0.07710933188597362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,128,0,1,fp8,fp8,0,0.11563199758529663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,128,0,1,float16,fp8,0,0.07533866663773854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,128,0,1,fp8,fp8,0,0.07674666742483775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,128,0,1,float16,float16,0,0.06701333324114482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,128,0,1,float16,fp8,0,0.06628799935181935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,128,0,1,fp8,fp8,0,0.06238933404286703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,128,0,1,float16,float16,0,0.06870933373769124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,128,0,1,float16,fp8,0,0.0668746680021286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,128,0,1,fp8,fp8,0,0.06234133243560791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,128,0,1,float16,float16,0,0.0702453354994456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,128,0,1,float16,fp8,0,0.06862933437029521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,128,0,1,fp8,fp8,0,0.06422933439413707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,128,0,1,float16,float16,0,0.04756799836953481
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,128,0,1,float16,fp8,0,0.04717866579691569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,128,0,1,fp8,fp8,0,0.04379733403523763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,128,0,1,float16,float16,0,0.046021332343419395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,128,0,1,float16,fp8,0,0.0459146648645401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,128,0,1,fp8,fp8,0,0.04037333279848099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,128,0,1,float16,float16,0,0.04572799801826477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,128,0,1,float16,fp8,0,0.0461760014295578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,128,0,1,fp8,fp8,0,0.04197333256403605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,128,0,1,float16,float16,0,0.04544533292452494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,128,0,1,float16,fp8,0,0.04567466676235199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,128,0,1,fp8,fp8,0,0.04124800115823746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,128,0,1,float16,float16,0,0.0332640012105306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,128,0,1,float16,fp8,0,0.033146666983763375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,128,0,1,fp8,fp8,0,0.03148799886306127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,128,0,1,float16,float16,0,0.03127466638882955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,128,0,1,float16,fp8,0,0.031317333380381264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,128,0,1,fp8,fp8,0,0.02940800040960312
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,128,0,1,float16,float16,0,0.03143999973932902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,128,0,1,float16,fp8,0,0.032272001107533775
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,128,0,1,fp8,fp8,0,0.029637334247430164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,128,0,1,float16,float16,0,0.03303466737270355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,128,0,1,float16,fp8,0,0.0315733328461647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,128,0,1,fp8,fp8,0,0.02940266579389572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,128,0,1,float16,float16,0,0.02941333254178365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,128,0,1,float16,fp8,0,0.02938666691382726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,128,0,1,fp8,fp8,0,0.027274665733178455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,128,0,1,float16,float16,0,0.027653334041436512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,128,0,1,float16,fp8,0,0.02935466667016347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,128,0,1,fp8,fp8,0,0.025301332275072735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,128,0,1,float16,float16,0,0.02775466690460841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,128,0,1,float16,fp8,0,0.02937600016593933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,128,0,1,fp8,fp8,0,0.02553066611289978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,128,0,1,float16,float16,0,0.029071999092896778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,128,0,1,float16,fp8,0,0.029690665503342945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,128,0,1,fp8,fp8,0,0.027509334186712902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,128,0,1,float16,float16,0,0.026762666801611584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,128,0,1,float16,fp8,0,0.025360000630219776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,128,0,1,fp8,fp8,0,0.023472001155217487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,128,0,1,float16,float16,0,0.02571733295917511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,128,0,1,float16,fp8,0,0.025493333737055462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,128,0,1,fp8,fp8,0,0.023370665808518726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,128,0,1,float16,float16,0,0.025301332275072735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,128,0,1,float16,fp8,0,0.025445332129796345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,128,0,1,fp8,fp8,0,0.0235359991590182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,128,0,1,float16,float16,0,0.024853333830833435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,128,0,1,float16,fp8,0,0.025306666890780132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,128,0,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,128,0,1,float16,float16,0,0.35873599847157794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,128,0,1,float16,fp8,0,0.3574133316675822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,128,0,1,fp8,fp8,0,0.35791468620300293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,128,0,1,float16,float16,0,0.368064006169637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,128,0,1,float16,fp8,0,0.36576000849405926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,128,0,1,fp8,fp8,0,0.3689546585083008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,128,0,1,float16,float16,0,0.37857600053151447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,128,0,1,float16,fp8,0,0.37430401643117267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,128,0,1,fp8,fp8,0,0.3760266701380412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,128,0,1,float16,float16,0,0.2274293303489685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,128,0,1,float16,fp8,0,0.2199359933535258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,128,0,1,fp8,fp8,0,0.2227840026219686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,128,0,1,float16,float16,0,0.18818666537602743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,128,0,1,float16,fp8,0,0.1880693236986796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,128,0,1,fp8,fp8,0,0.18625599145889282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,128,0,1,float16,float16,0,0.19359999895095825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,128,0,1,float16,fp8,0,0.19362133741378784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,128,0,1,fp8,fp8,0,0.19542932510375977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,128,0,1,float16,float16,0,0.19769599040349325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,128,0,1,float16,fp8,0,0.19851199785868326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,128,0,1,fp8,fp8,0,0.196943998336792
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,128,0,1,float16,float16,0,0.1252959966659546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,128,0,1,float16,fp8,0,0.12211199601491292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,128,0,1,fp8,fp8,0,0.12316800157229106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,128,0,1,float16,float16,0,0.10556800166765849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,128,0,1,float16,fp8,0,0.10393599669138591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,128,0,1,float16,float16,0,0.10753066341082256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,128,0,1,fp8,fp8,0,0.09964799880981445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,128,0,1,float16,fp8,0,0.10745066404342651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,128,0,1,fp8,fp8,0,0.10531199971834819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,128,0,1,float16,float16,0,0.10981333255767822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,128,0,1,float16,fp8,0,0.11088533202807109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,128,0,1,fp8,fp8,0,0.1093280017375946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,128,0,1,float16,float16,0,0.0701279987891515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,128,0,1,float16,fp8,0,0.06941333413124084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,128,0,1,float16,float16,0,0.060559997955958046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,128,0,1,fp8,fp8,0,0.05428266525268555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,128,0,1,float16,fp8,0,0.059845333298047386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,128,0,1,fp8,fp8,0,0.07150933146476746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,128,0,1,float16,float16,0,0.06027733286221822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,128,0,1,float16,fp8,0,0.06122133135795593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,128,0,1,fp8,fp8,0,0.05416533350944519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,128,0,1,float16,float16,0,0.061792001128196716
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,128,0,1,float16,fp8,0,0.060559997955958046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,128,0,1,float16,float16,0,0.043663998444875084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,128,0,1,fp8,fp8,0,0.05817066629727682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,128,0,1,float16,fp8,0,0.04443199932575226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,128,0,1,fp8,fp8,0,0.04181333382924398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,128,0,1,float16,float16,0,0.041877334316571556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,128,0,1,float16,fp8,0,0.04168533285458883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,128,0,1,fp8,fp8,0,0.03875733415285746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,128,0,1,float16,fp8,0,0.042394667863845825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,128,0,1,float16,float16,0,0.04196799794832865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,128,0,1,fp8,fp8,0,0.037920000652472176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,128,0,1,float16,float16,0,0.04338666796684265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,128,0,1,float16,fp8,0,0.043151999513308205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,128,0,1,float16,float16,0,0.02921066681543986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,128,0,1,fp8,fp8,0,0.037903999288876854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,128,0,1,float16,fp8,0,0.03143466760714849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,128,0,1,fp8,fp8,0,0.029215998947620392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,128,0,1,float16,float16,0,0.028736000259717304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,128,0,1,float16,fp8,0,0.029359998802344005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,128,0,1,fp8,fp8,0,0.02693866689999898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,128,0,1,float16,float16,0,0.029487999776999157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,128,0,1,float16,fp8,0,0.029605334003766377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,128,0,1,fp8,fp8,0,0.027119999130566914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,128,0,1,float16,float16,0,0.02903999884923299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,128,0,1,float16,fp8,0,0.029391999046007793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,128,0,1,fp8,fp8,0,0.027424000203609467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,128,0,1,float16,float16,0,0.0272533322374026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,128,0,1,float16,fp8,0,0.02555199960867564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,128,0,1,fp8,fp8,0,0.025605333348115284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,128,0,1,float16,float16,0,0.02526933451493581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,128,0,1,float16,fp8,0,0.025536000728607178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,128,0,1,fp8,fp8,0,0.02345066765944163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,128,0,1,float16,float16,0,0.025536000728607178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,128,0,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,128,0,1,fp8,fp8,0,0.023887999355793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,128,0,1,float16,float16,0,0.02569066733121872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,128,0,1,float16,fp8,0,0.02521066615978877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,128,0,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,128,0,1,float16,float16,0,0.02349333216746648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,128,0,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,128,0,1,fp8,fp8,0,0.020986666282018025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,128,0,1,float16,float16,0,0.02313599983851115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,128,0,1,float16,fp8,0,0.023285334308942158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,128,0,1,fp8,fp8,0,0.02004266654451688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,128,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,128,0,1,float16,fp8,0,0.023071999351183575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,128,0,1,fp8,fp8,0,0.019968000551064808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,128,0,1,float16,float16,0,0.023306667804718018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,128,0,1,float16,fp8,0,0.02348266790310542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,128,0,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,128,0,1,float16,float16,0,0.022991999983787537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,128,0,1,float16,fp8,0,0.02316266546646754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,128,0,1,fp8,fp8,0,0.021189334491888683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,128,0,1,float16,float16,0,0.023498666783173878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,128,0,1,float16,fp8,0,0.023002666731675465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,128,0,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,128,0,1,float16,float16,0,0.02143466720978419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,128,0,1,float16,fp8,0,0.02311466634273529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,128,0,1,fp8,fp8,0,0.01940800001223882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,128,0,1,float16,float16,0,0.02179199953873952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,128,0,1,float16,fp8,0,0.023215999205907185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,128,0,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,128,0,1,float16,float16,0,0.20248534282048544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,128,0,1,float16,fp8,0,0.2017973264058431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,128,0,1,fp8,fp8,0,0.1993280053138733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,128,0,1,float16,fp8,0,0.20748267571131387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,128,0,1,float16,float16,0,0.2084266742070516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,128,0,1,fp8,fp8,0,0.20689600706100464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,128,0,1,float16,float16,0,0.21153066555658975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,128,0,1,float16,fp8,0,0.20816532770792642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,128,0,1,fp8,fp8,0,0.21106133858362833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,128,0,1,float16,float16,0,0.12820800145467123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,128,0,1,float16,fp8,0,0.12379200259844463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,128,0,1,fp8,fp8,0,0.1281760036945343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,128,0,1,float16,float16,0,0.10874666770299275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,128,0,1,fp8,fp8,0,0.10406399766604106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,128,0,1,float16,fp8,0,0.10980799794197083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,128,0,1,float16,float16,0,0.1116480032602946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,128,0,1,float16,fp8,0,0.11162666479746501
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,128,0,1,fp8,fp8,0,0.11197333534558614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,128,0,1,float16,float16,0,0.1143946647644043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,128,0,1,float16,fp8,0,0.113237331310908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,128,0,1,fp8,fp8,0,0.11526399850845337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,128,0,1,float16,float16,0,0.07055466870466869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,128,0,1,float16,fp8,0,0.07075199981530507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,128,0,1,fp8,fp8,0,0.07484800120194753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,128,0,1,float16,float16,0,0.06214933097362518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,128,0,1,float16,fp8,0,0.0641546646753947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,128,0,1,fp8,fp8,0,0.058149332801500954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,128,0,1,float16,float16,0,0.06274133423964183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,128,0,1,float16,fp8,0,0.06418133278687795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,128,0,1,fp8,fp8,0,0.05997333427270254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,128,0,1,float16,float16,0,0.06429333488146464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,128,0,1,float16,fp8,0,0.0649599979321162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,128,0,1,fp8,fp8,0,0.06125866870085398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,128,0,1,float16,float16,0,0.04159999887148539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,128,0,1,float16,fp8,0,0.04177600145339966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,128,0,1,fp8,fp8,0,0.03956266740957896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,128,0,1,float16,fp8,0,0.03980266551176707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,128,0,1,fp8,fp8,0,0.03583466758330663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,128,0,1,float16,float16,0,0.04041066765785217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,128,0,1,float16,float16,0,0.039477333426475525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,128,0,1,float16,fp8,0,0.03982933362325033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,128,0,1,fp8,fp8,0,0.03774933268626531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,128,0,1,float16,fp8,0,0.03975466638803482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,128,0,1,float16,float16,0,0.03988266736268997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,128,0,1,fp8,fp8,0,0.037685332198937736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,128,0,1,float16,float16,0,0.03148266673088074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,128,0,1,float16,fp8,0,0.031541332602500916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,128,0,1,fp8,fp8,0,0.029663999875386555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,128,0,1,float16,float16,0,0.029285334050655365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,128,0,1,float16,fp8,0,0.029909332593282063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,128,0,1,fp8,fp8,0,0.028181334336598713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,128,0,1,float16,fp8,0,0.02926933268706004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,128,0,1,float16,float16,0,0.029711998999118805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,128,0,1,fp8,fp8,0,0.027610667049884796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,128,0,1,float16,float16,0,0.029818666477998097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,128,0,1,float16,fp8,0,0.029253333806991577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,128,0,1,fp8,fp8,0,0.02756800005833308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,128,0,1,float16,float16,0,0.021664001047611237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,128,0,1,float16,fp8,0,0.023226665953795116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,128,0,1,fp8,fp8,0,0.021370666722456615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,128,0,1,float16,float16,0,0.02145066608985265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,128,0,1,fp8,fp8,0,0.021365332106749218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,128,0,1,float16,fp8,0,0.021717332303524017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,128,0,1,float16,float16,0,0.021482666333516438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,128,0,1,float16,fp8,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,128,0,1,fp8,fp8,0,0.021477334201335907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,128,0,1,float16,float16,0,0.02330133318901062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,128,0,1,float16,fp8,0,0.023242667317390442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,128,0,1,float16,float16,0,0.019007999449968338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,128,0,1,fp8,fp8,0,0.022218666970729828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,128,0,1,float16,fp8,0,0.019519999623298645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,128,0,1,fp8,fp8,0,0.017946666727463405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,128,0,1,float16,float16,0,0.019727999965349834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,128,0,1,float16,fp8,0,0.018917333334684372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,128,0,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,128,0,1,float16,float16,0,0.01926400015751521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,128,0,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,128,0,1,fp8,fp8,0,0.019152000546455383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,128,0,1,float16,float16,0,0.019146667172511418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,128,0,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,128,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,128,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,128,0,1,float16,fp8,0,0.018842666099468868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,128,0,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,128,0,1,float16,float16,0,0.017445333302021027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,128,0,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,128,0,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,128,0,1,float16,float16,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,128,0,1,float16,fp8,0,0.018816000471512478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,128,0,1,float16,float16,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,128,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,128,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,128,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,128,0,1,fp8,fp8,0,0.017349333812793095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,128,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,128,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,128,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,128,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,128,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,128,0,1,float16,float16,0,0.01897066707412402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,128,0,1,float16,fp8,0,0.019424000134070713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,128,0,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,128,0,1,float16,float16,0,0.1341600020726522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,128,0,1,fp8,fp8,0,0.1280586620171865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,128,0,1,float16,fp8,0,0.13546666502952576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,128,0,1,float16,float16,0,0.13709867000579834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,128,0,1,float16,fp8,0,0.13645866513252258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,128,0,1,fp8,fp8,0,0.1360426644484202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,128,0,1,float16,float16,0,0.13749866684277853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,128,0,1,float16,fp8,0,0.13645333051681519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,128,0,1,fp8,fp8,0,0.1384266714255015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,128,0,1,float16,float16,0,0.08467732866605122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,128,0,1,float16,fp8,0,0.08187733093897502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,128,0,1,fp8,fp8,0,0.08516266942024231
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,128,0,1,float16,float16,0,0.07516799867153168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,128,0,1,float16,fp8,0,0.07470933099587758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,128,0,1,fp8,fp8,0,0.0689279983441035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,128,0,1,float16,float16,0,0.07474666833877563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,128,0,1,float16,fp8,0,0.0747573326031367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,128,0,1,fp8,fp8,0,0.07066133121649425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,128,0,1,float16,float16,0,0.07639466722806294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,128,0,1,float16,fp8,0,0.07566933333873749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,128,0,1,fp8,fp8,0,0.07392533123493195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,128,0,1,float16,float16,0,0.04784533381462097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,128,0,1,float16,fp8,0,0.04872000217437744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,128,0,1,fp8,fp8,0,0.048207998275756836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,128,0,1,float16,float16,0,0.04576533536116282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,128,0,1,float16,fp8,0,0.046762665112813316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,128,0,1,fp8,fp8,0,0.04385599990685781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,128,0,1,float16,float16,0,0.046800002455711365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,128,0,1,float16,fp8,0,0.04670399924119314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,128,0,1,fp8,fp8,0,0.043968002001444496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,128,0,1,float16,float16,0,0.04660800099372864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,128,0,1,float16,fp8,0,0.04580800235271454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,128,0,1,fp8,fp8,0,0.04385599990685781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,128,0,1,float16,float16,0,0.031514666974544525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,128,0,1,float16,fp8,0,0.03143466760714849
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,128,0,1,fp8,fp8,0,0.031386665999889374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,128,0,1,float16,float16,0,0.03129599988460541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,128,0,1,float16,fp8,0,0.03148799886306127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,128,0,1,fp8,fp8,0,0.02924799919128418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,128,0,1,float16,float16,0,0.03032533327738444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,128,0,1,fp8,fp8,0,0.029989334444204967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,128,0,1,float16,float16,0,0.03152533372243246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,128,0,1,float16,fp8,0,0.031162666777769726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,128,0,1,float16,fp8,0,0.03166933357715607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,128,0,1,float16,float16,0,0.025616000096003216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,128,0,1,fp8,fp8,0,0.029701332251230877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,128,0,1,float16,fp8,0,0.02518933266401291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,128,0,1,fp8,fp8,0,0.025792000194390614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,128,0,1,float16,float16,0,0.025248001019159954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,128,0,1,float16,fp8,0,0.02313599983851115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,128,0,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,128,0,1,float16,float16,0,0.025472000241279602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,128,0,1,float16,fp8,0,0.025114665428797405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,128,0,1,fp8,fp8,0,0.022976001103719074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,128,0,1,float16,float16,0,0.025114665428797405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,128,0,1,float16,fp8,0,0.025381334125995636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,128,0,1,fp8,fp8,0,0.02319466571013133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,128,0,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,128,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,128,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,128,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,128,0,1,float16,float16,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,128,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,128,0,1,fp8,fp8,0,0.018863999595244724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,128,0,1,float16,fp8,0,0.019093333433071773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,128,0,1,fp8,fp8,0,0.01752000053723653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,128,0,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,128,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,128,0,1,float16,float16,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,128,0,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,128,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,128,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,128,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,128,0,1,float16,fp8,0,0.017642666896184284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,128,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,128,0,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,128,0,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,128,0,1,float16,float16,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,128,0,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,128,0,1,float16,fp8,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,128,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,128,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,128,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,128,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,128,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,128,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,128,0,1,fp8,fp8,0,0.016352000335852306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,128,0,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,128,0,1,float16,float16,0,0.017349333812793095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,128,0,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,128,0,1,float16,float16,0,0.10069333513577779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,128,0,1,float16,fp8,0,0.10130666693051656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,128,0,1,fp8,fp8,0,0.09468799829483032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,128,0,1,float16,float16,0,0.09969600041707356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,128,0,1,float16,fp8,0,0.10134933392206828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,128,0,1,fp8,fp8,0,0.09521067142486572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,128,0,1,float16,float16,0,0.1016426682472229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,128,0,1,float16,fp8,0,0.10095466176668803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,128,0,1,fp8,fp8,0,0.09713600079218547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,128,0,1,float16,float16,0,0.06028800209363302
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,128,0,1,fp8,fp8,0,0.05832533538341522
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,128,0,1,float16,fp8,0,0.060229331254959106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,128,0,1,float16,float16,0,0.05805333455403646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,128,0,1,float16,fp8,0,0.05820799867312113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,128,0,1,fp8,fp8,0,0.056346664826075234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,128,0,1,float16,float16,0,0.05872533222039541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,128,0,1,float16,fp8,0,0.0582826683918635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,128,0,1,fp8,fp8,0,0.05598400036493937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,128,0,1,float16,float16,0,0.05902933577696482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,128,0,1,float16,fp8,0,0.05831466615200043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,128,0,1,fp8,fp8,0,0.05641599992911021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,128,0,1,float16,fp8,0,0.03797333439191183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,128,0,1,float16,float16,0,0.037871999045213066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,128,0,1,fp8,fp8,0,0.03775466730197271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,128,0,1,float16,float16,0,0.03761066744724909
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,128,0,1,float16,fp8,0,0.037434667348861694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,128,0,1,fp8,fp8,0,0.03558400024970373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,128,0,1,float16,float16,0,0.03737066686153412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,128,0,1,float16,fp8,0,0.037834666669368744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,128,0,1,fp8,fp8,0,0.03579200059175491
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,128,0,1,float16,float16,0,0.037658666570981346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,128,0,1,float16,fp8,0,0.03742400060097376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,128,0,1,fp8,fp8,0,0.03573333223660787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,128,0,1,float16,float16,0,0.028160000840822857
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,128,0,1,float16,fp8,0,0.02736533433198929
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,128,0,1,float16,float16,0,0.025349333882331848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,128,0,1,fp8,fp8,0,0.026250667870044708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,128,0,1,fp8,fp8,0,0.025621332228183746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,128,0,1,float16,fp8,0,0.02610666553179423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,128,0,1,float16,float16,0,0.027104000250498455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,128,0,1,float16,fp8,0,0.0271519993742307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,128,0,1,fp8,fp8,0,0.02701866626739502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,128,0,1,float16,fp8,0,0.02754666656255722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,128,0,1,float16,float16,0,0.02593066543340683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,128,0,1,fp8,fp8,0,0.02743999908367793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,128,0,1,float16,float16,0,0.02103466788927714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,128,0,1,float16,fp8,0,0.021125334004561108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,128,0,1,fp8,fp8,0,0.021274665991465252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,128,0,1,float16,float16,0,0.019808000574509304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,128,0,1,float16,fp8,0,0.01998399943113327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,128,0,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,128,0,1,float16,float16,0,0.020970667401949566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,128,0,1,float16,fp8,0,0.020501332978407543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,128,0,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,128,0,1,float16,float16,0,0.0195573332409064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,128,0,1,fp8,fp8,0,0.02109333376089732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,128,0,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,128,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,128,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,128,0,1,fp8,fp8,0,0.017765333255132038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,128,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,128,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,128,0,1,float16,float16,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,128,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,128,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,128,0,1,float16,float16,0,0.017498667041460674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,128,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,128,0,1,float16,fp8,0,0.01754666616519292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,128,0,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,128,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,128,0,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,128,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,128,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,128,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,128,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,128,0,1,float16,float16,0,0.015930666277805965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,128,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,128,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,128,0,1,fp8,fp8,0,0.016517333686351776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,128,0,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,128,0,1,fp8,fp8,0,0.016021333634853363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,128,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,128,0,1,fp8,fp8,0,0.015626666446526844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,128,0,1,float16,float16,0,0.015962666521469753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,128,0,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,128,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,128,0,1,float16,fp8,0,0.01674666628241539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,128,0,1,float16,float16,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,128,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,128,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,128,0,1,fp8,fp8,0,0.015674666812022526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,128,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,128,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,128,0,1,fp8,fp8,0,0.01609066625436147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,128,0,1,float16,float16,0,0.0841919978459676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,128,0,1,float16,fp8,0,0.08524800340334575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,128,0,1,fp8,fp8,0,0.08106666803359985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,128,0,1,float16,float16,0,0.08507733543713887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,128,0,1,float16,fp8,0,0.08482666810353597
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,128,0,1,fp8,fp8,0,0.08208000163237254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,128,0,1,float16,float16,0,0.08559999863306682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,128,0,1,float16,fp8,0,0.0844533344109853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,128,0,1,fp8,fp8,0,0.0825973351796468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,128,0,1,float16,float16,0,0.05203733344872793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,128,0,1,fp8,fp8,0,0.052058666944503784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,128,0,1,float16,fp8,0,0.05223466455936432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,128,0,1,float16,float16,0,0.050981332858403526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,128,0,1,float16,fp8,0,0.04974400003751119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,128,0,1,fp8,fp8,0,0.05012799799442291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,128,0,1,float16,float16,0,0.05176533261934916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,128,0,1,float16,fp8,0,0.049925332268079124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,128,0,1,fp8,fp8,0,0.049653331438700356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,128,0,1,float16,float16,0,0.050010666251182556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,128,0,1,float16,fp8,0,0.049813335140546165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,128,0,1,fp8,fp8,0,0.050000001986821495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,128,0,1,float16,float16,0,0.03364266703526179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,128,0,1,float16,fp8,0,0.03333866596221924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,128,0,1,fp8,fp8,0,0.03425066669782003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,128,0,1,float16,float16,0,0.03338133295377096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,128,0,1,float16,fp8,0,0.03356799980004629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,128,0,1,fp8,fp8,0,0.03366933266321818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,128,0,1,float16,float16,0,0.03339199970165888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,128,0,1,float16,fp8,0,0.03383466601371765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,128,0,1,fp8,fp8,0,0.03349866718053818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,128,0,1,float16,float16,0,0.03395200024048487
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,128,0,1,float16,fp8,0,0.03478399912516276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,128,0,1,float16,float16,0,0.023365333676338196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,128,0,1,fp8,fp8,0,0.033359999457995095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,128,0,1,float16,fp8,0,0.02346133440732956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,128,0,1,float16,float16,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,128,0,1,float16,fp8,0,0.02309333284695943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,128,0,1,fp8,fp8,0,0.02348800003528595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,128,0,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,128,0,1,float16,float16,0,0.02346666653951009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,128,0,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,128,0,1,fp8,fp8,0,0.022986667851607006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,128,0,1,float16,float16,0,0.02207999924818675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,128,0,1,float16,fp8,0,0.023018665611743927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,128,0,1,fp8,fp8,0,0.02347733328739802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,128,0,1,float16,float16,0,0.01921066641807556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,128,0,1,float16,fp8,0,0.019589333484570186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,128,0,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,128,0,1,float16,float16,0,0.019333332777023315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,128,0,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,128,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,128,0,1,float16,float16,0,0.019333332777023315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,128,0,1,float16,fp8,0,0.020197333147128422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,128,0,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,128,0,1,float16,float16,0,0.018810667097568512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,128,0,1,float16,fp8,0,0.020367999871571858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,128,0,1,fp8,fp8,0,0.020576000213623047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,128,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,128,0,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,128,0,1,float16,fp8,0,0.016714667280515034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,128,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,128,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,128,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,128,0,1,float16,fp8,0,0.01788266624013583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,128,0,1,fp8,fp8,0,0.017808000246683758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,128,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,128,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,128,0,1,fp8,fp8,0,0.017530667285124462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,128,0,1,float16,float16,0,0.015344000111023584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,128,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,128,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,128,0,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,128,0,1,fp8,fp8,0,0.017488000293572743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,128,0,1,float16,float16,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,128,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,128,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,128,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,128,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,128,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,128,0,1,fp8,fp8,0,0.015589332828919092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,128,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,128,0,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,128,0,1,float16,float16,0,0.016682667036851246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,128,0,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,128,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,128,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,128,0,1,float16,float16,0,0.017450666675964992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,128,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,128,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,128,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,128,0,1,fp8,fp8,0,0.01640533283352852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,1,128,0,1,float16,float16,0,0.07134399811426799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,1,128,0,1,float16,fp8,0,0.07223466535409291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,1,128,0,1,fp8,fp8,0,0.06833066542943318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,2,128,0,1,float16,float16,0,0.070933332045873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,2,128,0,1,float16,fp8,0,0.07130133112271626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,2,128,0,1,fp8,fp8,0,0.06854400038719177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,4,128,0,1,float16,float16,0,0.07286400099595387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,4,128,0,1,float16,fp8,0,0.0721013347307841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,12,4,128,0,1,fp8,fp8,0,0.0701333334048589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,0,0.04412800073623657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,0,0.04390400151411692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,12,128,0,1,fp8,fp8,0,0.04159999887148539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,1,128,0,1,float16,float16,0,0.04346133271853129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,1,128,0,1,float16,fp8,0,0.04407466451327006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,1,128,0,1,fp8,fp8,0,0.04178666571776072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,2,128,0,1,float16,float16,0,0.04311466713746389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,2,128,0,1,float16,fp8,0,0.04365866879622141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,2,128,0,1,fp8,fp8,0,0.04196799794832865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,4,128,0,1,float16,float16,0,0.043322667479515076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,4,128,0,1,float16,fp8,0,0.04391466577847799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,12,4,128,0,1,fp8,fp8,0,0.04199466605981191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,0,0.029317334294319153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,12,128,0,1,fp8,fp8,0,0.02934933453798294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,1,128,0,1,float16,float16,0,0.02962133288383484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,1,128,0,1,float16,fp8,0,0.029626667499542236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,1,128,0,1,fp8,fp8,0,0.029733332494894665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,2,128,0,1,float16,float16,0,0.029829333225886028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,2,128,0,1,float16,fp8,0,0.02959999938805898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,2,128,0,1,fp8,fp8,0,0.029296000798543293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,4,128,0,1,float16,float16,0,0.02959999938805898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,4,128,0,1,float16,fp8,0,0.02941333254178365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,12,4,128,0,1,fp8,fp8,0,0.029333333174387615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,0,0.02292266736427943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,0,0.024298667907714844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,12,128,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,1,128,0,1,float16,float16,0,0.023226665953795116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,1,128,0,1,float16,fp8,0,0.025061334172884624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,2,128,0,1,float16,float16,0,0.02531733363866806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,2,128,0,1,float16,fp8,0,0.023311999936898548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,1,128,0,1,fp8,fp8,0,0.021898667017618816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,2,128,0,1,fp8,fp8,0,0.023082666099071503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,4,128,0,1,float16,float16,0,0.024112001061439514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,4,128,0,1,float16,fp8,0,0.0249493345618248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,12,4,128,0,1,fp8,fp8,0,0.02329600105683009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,0,0.02141333371400833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,12,128,0,1,fp8,fp8,0,0.019920000185569126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,1,128,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,1,128,0,1,float16,float16,0,0.01950399950146675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,1,128,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,2,128,0,1,float16,float16,0,0.02128000060717265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,2,128,0,1,fp8,fp8,0,0.019381333142518997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,2,128,0,1,float16,fp8,0,0.021573332448800404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,4,128,0,1,float16,float16,0,0.019733333339293797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,4,128,0,1,float16,fp8,0,0.02165866643190384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,12,4,128,0,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,0,0.01563199982047081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,12,128,0,1,fp8,fp8,0,0.017386666188637417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,1,128,0,1,float16,float16,0,0.017338667064905167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,1,128,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,1,128,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,2,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,2,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,2,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,4,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,4,128,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,12,4,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,12,128,0,1,fp8,fp8,0,0.0161920003592968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,1,128,0,1,float16,float16,0,0.015397333850463232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,1,128,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,1,128,0,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,2,128,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,2,128,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,2,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,4,128,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,4,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,12,4,128,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,12,128,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,1,128,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,1,128,0,1,float16,fp8,0,0.01600533351302147
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,1,128,0,1,fp8,fp8,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,2,128,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,2,128,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,2,128,0,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,4,128,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,4,128,0,1,float16,fp8,0,0.015509333461523056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,12,4,128,0,1,fp8,fp8,0,0.016016000260909397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,1,128,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,12,128,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,1,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,1,128,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,2,128,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,2,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,2,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,4,128,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,4,128,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,12,4,128,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,128,0,1,fp8,fp8,0,2.826709429423014
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,128,0,1,float16,float16,0,3.777141253153483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,128,0,1,float16,fp8,0,5.1938826243082685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,128,0,1,fp8,fp8,0,2.8908958435058594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,128,0,1,float16,float16,0,5.176938692728679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,128,0,1,float16,fp8,0,5.052394549051921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,128,0,1,float16,float16,0,5.626656214396159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,128,0,1,float16,float16,0,2.3816213607788086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,128,0,1,fp8,fp8,0,3.0479148228963218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,128,0,1,float16,fp8,0,4.668191909790039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,128,0,1,float16,fp8,0,2.4045546849568686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,128,0,1,fp8,fp8,0,1.5392746925354004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,128,0,1,float16,float16,0,1.7423253059387207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,128,0,1,float16,fp8,0,1.721519947052002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,128,0,1,fp8,fp8,0,1.4929547309875488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,128,0,1,float16,float16,0,1.7346453666687012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,128,0,1,float16,fp8,0,2.514965375264486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,128,0,1,fp8,fp8,0,1.6497440338134766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,128,0,1,float16,float16,0,2.191509405771891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,128,0,1,float16,float16,0,0.9748906294504801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,128,0,1,float16,fp8,0,2.2347413698832193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,128,0,1,float16,fp8,0,1.0859039624532063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,128,0,1,fp8,fp8,0,1.5124266942342122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,128,0,1,fp8,fp8,0,0.8548479874928793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,128,0,1,fp8,fp8,0,0.8346292972564697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,128,0,1,float16,float16,0,0.996117353439331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,128,0,1,float16,fp8,0,1.0595200061798096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,128,0,1,float16,float16,0,0.9590346813201904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,128,0,1,fp8,fp8,0,0.8338346481323242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,128,0,1,float16,fp8,0,0.9684106508890787
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,128,0,1,float16,float16,0,0.9613706270853678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,128,0,1,float16,fp8,0,1.0415039857228596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,128,0,1,float16,float16,0,0.648416002591451
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,128,0,1,float16,fp8,0,0.5793706576029459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,128,0,1,fp8,fp8,0,0.8390506903330485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,128,0,1,fp8,fp8,0,0.5405919949213663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,128,0,1,float16,float16,0,0.5691359837849935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,128,0,1,float16,fp8,0,0.5623573462168375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,128,0,1,fp8,fp8,0,0.49608532587687176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,128,0,1,float16,float16,0,0.5715839862823486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,128,0,1,fp8,fp8,0,0.501360019048055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,128,0,1,float16,fp8,0,0.5646560192108154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,128,0,1,fp8,fp8,0,0.504752000172933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,128,0,1,float16,float16,0,0.5757386684417725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,128,0,1,float16,fp8,0,0.5749386548995972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,128,0,1,fp8,fp8,0,1.7138932545979817
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,128,0,1,float16,float16,0,1.9523146947224934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,128,0,1,float16,fp8,0,2.621994654337565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,128,0,1,float16,float16,0,2.117232004801432
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,128,0,1,fp8,fp8,0,1.8106346130371094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,128,0,1,float16,fp8,0,2.83077335357666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,128,0,1,float16,float16,0,1.0881280104319255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,128,0,1,float16,fp8,0,2.237818717956543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,128,0,1,fp8,fp8,0,1.7421013514200847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,128,0,1,float16,float16,0,2.8383359909057617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,128,0,1,float16,fp8,0,1.1657226880391438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,128,0,1,float16,float16,0,1.0518293380737305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,128,0,1,fp8,fp8,0,0.9587039947509766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,128,0,1,float16,fp8,0,1.1315733591715496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,128,0,1,fp8,fp8,0,0.923642635345459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,128,0,1,float16,fp8,0,1.0795893669128418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,128,0,1,float16,float16,0,1.3171626726786296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,128,0,1,fp8,fp8,0,0.9291840394337972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,128,0,1,float16,float16,0,1.0718239943186443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,128,0,1,float16,float16,0,0.6202346483866373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,128,0,1,float16,fp8,0,1.3231946627298992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,128,0,1,fp8,fp8,0,0.9345119794209799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,128,0,1,float16,fp8,0,0.624506672223409
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,128,0,1,fp8,fp8,0,0.5448853174845377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,128,0,1,float16,fp8,0,0.5955466826756796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,128,0,1,float16,float16,0,0.6022986570994059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,128,0,1,fp8,fp8,0,0.5289333264032999
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,128,0,1,float16,float16,0,0.6009706656138102
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,128,0,1,fp8,fp8,0,0.5311093330383301
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,128,0,1,float16,fp8,0,0.6009386777877808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,128,0,1,float16,float16,0,0.6104906797409058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,128,0,1,float16,float16,0,0.37442131837209064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,128,0,1,float16,fp8,0,0.6099253495534261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,128,0,1,fp8,fp8,0,0.538101315498352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,128,0,1,float16,fp8,0,0.37723731994628906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,128,0,1,fp8,fp8,0,0.33745066324869794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,128,0,1,float16,float16,0,0.3640213410059611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,128,0,1,float16,fp8,0,0.3667413393656413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,128,0,1,fp8,fp8,0,0.32635732491811115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,128,0,1,float16,float16,0,0.3638186852137248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,128,0,1,float16,fp8,0,0.36686933040618896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,128,0,1,fp8,fp8,0,0.3311093250910441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,128,0,1,float16,float16,0,0.37328533331553143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,128,0,1,float16,fp8,0,0.3675040006637573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,128,0,1,fp8,fp8,0,0.3320479989051819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,128,0,1,float16,float16,0,1.61025603612264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,128,0,1,float16,fp8,0,1.4487679799397786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,128,0,1,fp8,fp8,0,1.258138656616211
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,128,0,1,float16,float16,0,1.6161173184712727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,128,0,1,fp8,fp8,0,1.2694453398386638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,128,0,1,float16,float16,0,1.6784639358520508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,128,0,1,float16,fp8,0,2.001728057861328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,128,0,1,float16,fp8,0,1.508863925933838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,128,0,1,float16,float16,0,0.8317546844482422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,128,0,1,fp8,fp8,0,1.283893346786499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,128,0,1,float16,fp8,0,0.8392693201700846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,128,0,1,fp8,fp8,0,1.0490506490071614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,128,0,1,float16,float16,0,0.8047040303548177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,128,0,1,float16,fp8,0,0.7835360368092855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,128,0,1,fp8,fp8,0,0.8502399921417236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,128,0,1,float16,float16,0,0.7893386681874593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,128,0,1,float16,fp8,0,0.7884106636047363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,128,0,1,fp8,fp8,0,0.7228426933288574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,128,0,1,float16,float16,0,0.806272029876709
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,128,0,1,float16,fp8,0,0.7974826494852701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,128,0,1,float16,float16,0,0.47618667284647626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,128,0,1,fp8,fp8,0,0.6948266824086508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,128,0,1,float16,fp8,0,0.5003840128580729
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,128,0,1,fp8,fp8,0,0.4173119862874349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,128,0,1,float16,float16,0,0.44914666811625165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,128,0,1,float16,fp8,0,0.44741864999135333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,128,0,1,fp8,fp8,0,0.40142401059468585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,128,0,1,float16,float16,0,0.4544106721878052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,128,0,1,float16,fp8,0,0.45743465423583984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,128,0,1,fp8,fp8,0,0.4005546569824219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,128,0,1,float16,float16,0,0.4622453451156616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,128,0,1,float16,fp8,0,0.4611999988555908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,128,0,1,float16,float16,0,0.2898719906806946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,128,0,1,fp8,fp8,0,0.4103786547978719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,128,0,1,float16,fp8,0,0.2964959939320882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,128,0,1,fp8,fp8,0,0.26493332783381146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,128,0,1,float16,float16,0,0.28518933057785034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,128,0,1,float16,fp8,0,0.2839146653811137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,128,0,1,fp8,fp8,0,0.2572000026702881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,128,0,1,float16,float16,0,0.2829119960467021
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,128,0,1,fp8,fp8,0,0.2593013246854146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,128,0,1,float16,fp8,0,0.2860106627146403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,128,0,1,float16,float16,0,0.28938666979471844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,128,0,1,float16,fp8,0,0.28562132517496747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,128,0,1,fp8,fp8,0,0.2600906689961751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,128,0,1,float16,float16,0,1.8953866958618164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,128,0,1,float16,fp8,0,1.933322588602702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,128,0,1,fp8,fp8,0,1.655509312947591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,128,0,1,float16,float16,0,1.9010666211446126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,128,0,1,fp8,fp8,0,1.6696747144063313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,128,0,1,float16,fp8,0,2.778202692667643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,128,0,1,float16,float16,0,2.220383961995443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,128,0,1,float16,float16,0,1.039669354756673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,128,0,1,float16,fp8,0,2.33296537399292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,128,0,1,fp8,fp8,0,1.6893760363260906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,128,0,1,float16,fp8,0,1.0463893413543701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,128,0,1,fp8,fp8,0,0.918170690536499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,128,0,1,float16,float16,0,1.0092213153839111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,128,0,1,float16,fp8,0,1.1762879689534504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,128,0,1,fp8,fp8,0,0.870581309000651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,128,0,1,float16,float16,0,1.2113386789957683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,128,0,1,float16,fp8,0,1.0070186456044514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,128,0,1,fp8,fp8,0,0.8789599736531576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,128,0,1,float16,float16,0,1.0211893717447917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,128,0,1,float16,fp8,0,1.0308533509572346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,128,0,1,float16,float16,0,0.5699199835459391
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,128,0,1,float16,fp8,0,0.6594719886779785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,128,0,1,fp8,fp8,0,0.5146400133768717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,128,0,1,fp8,fp8,0,1.0321919918060303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,128,0,1,float16,float16,0,0.5510986646016439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,128,0,1,float16,fp8,0,0.6049226522445679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,128,0,1,fp8,fp8,0,0.4848586718241374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,128,0,1,float16,float16,0,0.5540853341420492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,128,0,1,float16,fp8,0,0.5550133387247721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,128,0,1,fp8,fp8,0,0.4856693347295125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,128,0,1,float16,float16,0,0.5627466837565104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,128,0,1,float16,fp8,0,0.5638720194498698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,128,0,1,float16,float16,0,0.3345173199971517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,128,0,1,float16,fp8,0,0.3407680193583171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,128,0,1,fp8,fp8,0,0.49330135186513263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,128,0,1,fp8,fp8,0,0.3044106761614482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,128,0,1,float16,float16,0,0.31658132870992023
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,128,0,1,fp8,fp8,0,0.28490666548411053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,128,0,1,float16,fp8,0,0.31992000341415405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,128,0,1,float16,float16,0,0.3225173354148865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,128,0,1,float16,fp8,0,0.32197866837183636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,128,0,1,fp8,fp8,0,0.2903680006663005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,128,0,1,float16,float16,0,0.32545600334803265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,128,0,1,float16,fp8,0,0.33000000317891437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,128,0,1,float16,float16,0,0.21362133820851645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,128,0,1,fp8,fp8,0,0.29707199335098267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,128,0,1,float16,fp8,0,0.21411732832590738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,128,0,1,fp8,fp8,0,0.19734400510787964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,128,0,1,float16,float16,0,0.2132693330446879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,128,0,1,float16,fp8,0,0.21131199598312378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,128,0,1,fp8,fp8,0,0.19012266397476196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,128,0,1,float16,float16,0,0.21042132377624512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,128,0,1,fp8,fp8,0,0.18996800978978476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,128,0,1,float16,fp8,0,0.2128159999847412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,128,0,1,float16,float16,0,0.2084160049756368
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,128,0,1,float16,fp8,0,0.21227733294169107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,128,0,1,fp8,fp8,0,0.19165867567062378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,128,0,1,float16,float16,0,1.1980053583780925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,128,0,1,float16,fp8,0,1.1775200366973877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,128,0,1,fp8,fp8,0,1.0357653299967449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,128,0,1,float16,float16,0,1.198362668355306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,128,0,1,fp8,fp8,0,1.0471466382344563
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,128,0,1,float16,fp8,0,1.2140106360117595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,128,0,1,float16,float16,0,1.2006773153940837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,128,0,1,float16,fp8,0,1.3748052914937336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,128,0,1,float16,float16,0,0.7803520361582438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,128,0,1,float16,fp8,0,0.7332959969838461
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,128,0,1,fp8,fp8,0,1.063370704650879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,128,0,1,fp8,fp8,0,0.7409546375274658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,128,0,1,float16,float16,0,0.6733493010203043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,128,0,1,fp8,fp8,0,0.5520586570103964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,128,0,1,float16,fp8,0,0.6936799685160319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,128,0,1,float16,float16,0,0.6364373366038004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,128,0,1,fp8,fp8,0,0.5598186651865641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,128,0,1,float16,fp8,0,0.6447360118230184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,128,0,1,float16,float16,0,0.6569439967473348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,128,0,1,float16,fp8,0,0.6452106634775797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,128,0,1,float16,float16,0,0.37452268600463867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,128,0,1,float16,fp8,0,0.3853919903437297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,128,0,1,fp8,fp8,0,0.5659679969151815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,128,0,1,fp8,fp8,0,0.3340746561686198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,128,0,1,float16,float16,0,0.35202133655548096
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,128,0,1,float16,fp8,0,0.35390400886535645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,128,0,1,fp8,fp8,0,0.31385066111882526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,128,0,1,float16,float16,0,0.35415466626485187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,128,0,1,float16,fp8,0,0.35865068435668945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,128,0,1,fp8,fp8,0,0.3200586636861165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,128,0,1,float16,float16,0,0.36044267813364667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,128,0,1,float16,fp8,0,0.3625439802805583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,128,0,1,float16,float16,0,0.22155199448267618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,128,0,1,fp8,fp8,0,0.32336000601450604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,128,0,1,float16,fp8,0,0.22299732764561972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,128,0,1,fp8,fp8,0,0.20270399252573648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,128,0,1,float16,float16,0,0.21158399184544882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,128,0,1,float16,fp8,0,0.2115466594696045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,128,0,1,fp8,fp8,0,0.18741865952809653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,128,0,1,float16,float16,0,0.21248533328374228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,128,0,1,fp8,fp8,0,0.19044800599416098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,128,0,1,float16,fp8,0,0.21305600802103677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,128,0,1,float16,float16,0,0.21280533075332642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,128,0,1,float16,fp8,0,0.21530133485794067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,128,0,1,float16,float16,0,0.1433013379573822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,128,0,1,float16,fp8,0,0.14296533664067587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,128,0,1,fp8,fp8,0,0.19674134254455566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,128,0,1,fp8,fp8,0,0.13316266735394797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,128,0,1,float16,float16,0,0.1402133305867513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,128,0,1,float16,fp8,0,0.14057599504788718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,128,0,1,fp8,fp8,0,0.13056533535321554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,128,0,1,float16,float16,0,0.13962133725484213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,128,0,1,float16,fp8,0,0.14114666978518167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,128,0,1,fp8,fp8,0,0.13011200229326883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,128,0,1,float16,float16,0,0.13953600327173868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,128,0,1,float16,fp8,0,0.14232533176740012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,128,0,1,fp8,fp8,0,0.130213330189387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,128,0,1,float16,fp8,0,1.1988746325174968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,128,0,1,float16,float16,0,1.2070773442586262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,128,0,1,fp8,fp8,0,1.0616479714711506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,128,0,1,fp8,fp8,0,1.0804693698883057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,128,0,1,float16,float16,0,1.2138079802195232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,128,0,1,float16,fp8,0,1.230458656946818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,128,0,1,fp8,fp8,0,1.098304033279419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,128,0,1,float16,float16,0,1.2551093101501465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,128,0,1,float16,fp8,0,1.2440373102823894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,128,0,1,fp8,fp8,0,0.6040586630503336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,128,0,1,float16,float16,0,0.6732532978057861
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,128,0,1,float16,fp8,0,0.7337493101755778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,128,0,1,float16,float16,0,0.6311946709950765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,128,0,1,float16,fp8,0,0.6724747021993002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,128,0,1,fp8,fp8,0,0.6039520104726156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,128,0,1,float16,float16,0,0.6353973150253296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,128,0,1,fp8,fp8,0,0.562549352645874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,128,0,1,float16,fp8,0,0.6372640132904053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,128,0,1,float16,float16,0,0.6424959897994995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,128,0,1,float16,float16,0,0.36610666910807294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,128,0,1,float16,fp8,0,0.3662240107854207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,128,0,1,float16,fp8,0,0.6491359869639078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,128,0,1,fp8,fp8,0,0.5736960172653198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,128,0,1,fp8,fp8,0,0.33486398061116535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,128,0,1,float16,float16,0,0.3396693468093872
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,128,0,1,float16,fp8,0,0.34249067306518555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,128,0,1,fp8,fp8,0,0.3036959966023763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,128,0,1,float16,float16,0,0.3449759880701701
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,128,0,1,float16,fp8,0,0.34407468636830646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,128,0,1,fp8,fp8,0,0.309168001015981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,128,0,1,float16,float16,0,0.35227731863657635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,128,0,1,float16,float16,0,0.20966400702794394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,128,0,1,float16,fp8,0,0.3538026809692383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,128,0,1,fp8,fp8,0,0.31464533011118573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,128,0,1,float16,fp8,0,0.2149333357810974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,128,0,1,fp8,fp8,0,0.19272534052530924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,128,0,1,float16,float16,0,0.19333332777023315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,128,0,1,fp8,fp8,0,0.17554134130477905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,128,0,1,float16,fp8,0,0.19189866383870444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,128,0,1,float16,float16,0,0.19453332821528116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,128,0,1,float16,fp8,0,0.19749865929285684
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,128,0,1,fp8,fp8,0,0.18200532595316568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,128,0,1,float16,float16,0,0.20091732343037924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,128,0,1,float16,float16,0,0.13121066490809122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,128,0,1,float16,fp8,0,0.1309653321901957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,128,0,1,float16,fp8,0,0.20123199621836343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,128,0,1,fp8,fp8,0,0.18643200397491455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,128,0,1,fp8,fp8,0,0.12081066767374675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,128,0,1,float16,float16,0,0.12626666824022928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,128,0,1,float16,fp8,0,0.12447999914487202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,128,0,1,fp8,fp8,0,0.11376532912254333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,128,0,1,float16,float16,0,0.12843199570973715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,128,0,1,fp8,fp8,0,0.11361066500345866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,128,0,1,float16,fp8,0,0.12960533301035562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,128,0,1,float16,float16,0,0.12619733810424805
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,128,0,1,float16,fp8,0,0.1285599966843923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,128,0,1,float16,float16,0,0.08481066425641377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,128,0,1,fp8,fp8,0,0.1155413289864858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,128,0,1,float16,fp8,0,0.085125337044398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,128,0,1,fp8,fp8,0,0.08221866687138875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,128,0,1,float16,float16,0,0.08317333459854126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,128,0,1,float16,fp8,0,0.08272533118724823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,128,0,1,fp8,fp8,0,0.07899199922879536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,128,0,1,float16,float16,0,0.08260266482830048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,128,0,1,float16,fp8,0,0.08377599716186523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,128,0,1,fp8,fp8,0,0.0786240001519521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,128,0,1,float16,float16,0,0.08298133313655853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,128,0,1,float16,fp8,0,0.08288533488909404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,128,0,1,fp8,fp8,0,0.08062399923801422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,128,0,1,float16,float16,0,0.7851520379384359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,128,0,1,float16,fp8,0,0.788586695988973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,128,0,1,fp8,fp8,0,0.6952586968739828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,128,0,1,float16,float16,0,0.791424036026001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,128,0,1,fp8,fp8,0,0.7114240328470866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,128,0,1,float16,fp8,0,0.8008373578389486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,128,0,1,float16,float16,0,0.8084800243377686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,128,0,1,float16,fp8,0,0.8160266876220703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,128,0,1,float16,float16,0,0.4428266684214274
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,128,0,1,float16,fp8,0,0.4533546765645345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,128,0,1,fp8,fp8,0,0.7240853309631348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,128,0,1,fp8,fp8,0,0.5212159951527914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,128,0,1,float16,float16,0,0.41202131907145184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,128,0,1,float16,fp8,0,0.4164160092671712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,128,0,1,fp8,fp8,0,0.3694560130437215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,128,0,1,float16,float16,0,0.5146506627400717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,128,0,1,float16,fp8,0,0.4201706647872925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,128,0,1,fp8,fp8,0,0.3738773266474406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,128,0,1,float16,float16,0,0.4270026683807373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,128,0,1,float16,fp8,0,0.4282453457514445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,128,0,1,fp8,fp8,0,0.3815893332163493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,128,0,1,float16,float16,0,0.24627200762430826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,128,0,1,float16,fp8,0,0.2515359918276469
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,128,0,1,fp8,fp8,0,0.22490666309992471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,128,0,1,float16,float16,0,0.22564266125361124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,128,0,1,float16,fp8,0,0.22554133335749307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,128,0,1,fp8,fp8,0,0.20615466435750326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,128,0,1,float16,float16,0,0.2315573294957479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,128,0,1,float16,fp8,0,0.23163199424743652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,128,0,1,fp8,fp8,0,0.2098346749941508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,128,0,1,float16,float16,0,0.2370026707649231
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,128,0,1,float16,fp8,0,0.2388319969177246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,128,0,1,fp8,fp8,0,0.21445866425832114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,128,0,1,float16,float16,0,0.14451199769973755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,128,0,1,float16,fp8,0,0.14806399742762247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,128,0,1,fp8,fp8,0,0.13453867038091025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,128,0,1,float16,float16,0,0.13036800424257913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,128,0,1,float16,fp8,0,0.13091199596722922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,128,0,1,fp8,fp8,0,0.11772800485293071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,128,0,1,float16,float16,0,0.13215999801953635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,128,0,1,float16,fp8,0,0.13121599952379862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,128,0,1,fp8,fp8,0,0.11984533071517944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,128,0,1,float16,float16,0,0.1357973317305247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,128,0,1,float16,fp8,0,0.1353600025177002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,128,0,1,fp8,fp8,0,0.12596799929936728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,128,0,1,float16,float16,0,0.08873599767684937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,128,0,1,float16,fp8,0,0.09050666292508443
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,128,0,1,fp8,fp8,0,0.08301866551240285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,128,0,1,float16,float16,0,0.08529067039489746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,128,0,1,float16,fp8,0,0.08807999889055888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,128,0,1,fp8,fp8,0,0.07915199796358745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,128,0,1,float16,float16,0,0.08669867118199666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,128,0,1,float16,fp8,0,0.08691199620564778
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,128,0,1,fp8,fp8,0,0.07943999767303467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,128,0,1,float16,float16,0,0.08666666348775227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,128,0,1,float16,fp8,0,0.08727467060089111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,128,0,1,fp8,fp8,0,0.08078399797280629
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,128,0,1,float16,float16,0,0.06649066507816315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,128,0,1,float16,fp8,0,0.06843733290831248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,128,0,1,fp8,fp8,0,0.06448000172773997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,128,0,1,float16,float16,0,0.06650133430957794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,128,0,1,float16,fp8,0,0.06655466556549072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,128,0,1,fp8,fp8,0,0.0620959997177124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,128,0,1,float16,float16,0,0.06656000018119812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,128,0,1,fp8,fp8,0,0.06243200103441874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,128,0,1,float16,fp8,0,0.06657599906126659
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,128,0,1,float16,float16,0,0.0666240006685257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,128,0,1,float16,fp8,0,0.06621866424878438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,128,0,1,fp8,fp8,0,0.0642080008983612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,128,0,1,float16,float16,0,0.8778560161590576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,128,0,1,fp8,fp8,0,0.7700533072153727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,128,0,1,float16,fp8,0,0.8805279731750488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,128,0,1,float16,float16,0,0.8941493034362793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,128,0,1,fp8,fp8,0,0.7883306344350179
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,128,0,1,float16,fp8,0,0.886298656463623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,128,0,1,float16,float16,0,0.4923093318939209
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,128,0,1,float16,float16,0,0.9104320208231608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,128,0,1,float16,fp8,0,0.9102239608764648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,128,0,1,fp8,fp8,0,0.8091146945953369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,128,0,1,float16,fp8,0,0.49831998348236084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,128,0,1,fp8,fp8,0,0.44998399416605633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,128,0,1,float16,float16,0,0.44702935218811035
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,128,0,1,float16,fp8,0,0.4474986791610718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,128,0,1,fp8,fp8,0,0.3990986744562785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,128,0,1,float16,float16,0,0.45883198579152423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,128,0,1,fp8,fp8,0,0.40782399972279865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,128,0,1,float16,fp8,0,0.46013331413269043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,128,0,1,float16,float16,0,0.4652426640192668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,128,0,1,float16,float16,0,0.2629866600036621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,128,0,1,float16,fp8,0,0.4724053144454956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,128,0,1,fp8,fp8,0,0.4211146831512451
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,128,0,1,float16,fp8,0,0.2645333409309387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,128,0,1,fp8,fp8,0,0.24249066909154257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,128,0,1,float16,float16,0,0.23772799968719482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,128,0,1,float16,fp8,0,0.23771200577418009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,128,0,1,fp8,fp8,0,0.21633066733678183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,128,0,1,float16,float16,0,0.24425599972407022
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,128,0,1,float16,fp8,0,0.24710933367411295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,128,0,1,fp8,fp8,0,0.22060799598693848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,128,0,1,float16,float16,0,0.2480106751124064
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,128,0,1,float16,fp8,0,0.253109335899353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,128,0,1,fp8,fp8,0,0.22984000047047934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,128,0,1,float16,float16,0,0.1476800044377645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,128,0,1,float16,fp8,0,0.15028267105420431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,128,0,1,fp8,fp8,0,0.1393066644668579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,128,0,1,float16,fp8,0,0.12869333227475485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,128,0,1,float16,float16,0,0.12850133577982584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,128,0,1,fp8,fp8,0,0.11754133303960164
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,128,0,1,float16,float16,0,0.13379733761151633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,128,0,1,float16,fp8,0,0.13079999883969626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,128,0,1,fp8,fp8,0,0.12743999560674033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,128,0,1,float16,float16,0,0.13793067137400308
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,128,0,1,float16,float16,0,0.08689066767692566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,128,0,1,float16,fp8,0,0.14146133263905844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,128,0,1,fp8,fp8,0,0.12983999649683634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,128,0,1,float16,fp8,0,0.0879200001557668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,128,0,1,fp8,fp8,0,0.08534399668375652
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,128,0,1,float16,float16,0,0.0823359986146291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,128,0,1,float16,fp8,0,0.08104533453782399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,128,0,1,fp8,fp8,0,0.0748533308506012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,128,0,1,float16,float16,0,0.0825493335723877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,128,0,1,float16,fp8,0,0.08287466565767924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,128,0,1,fp8,fp8,0,0.0767680009206136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,128,0,1,float16,float16,0,0.08229333162307739
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,128,0,1,float16,fp8,0,0.08462400237719218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,128,0,1,fp8,fp8,0,0.07795733213424683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,128,0,1,float16,float16,0,0.054272000988324486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,128,0,1,float16,fp8,0,0.05598400036493937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,128,0,1,fp8,fp8,0,0.05393599967161814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,128,0,1,float16,float16,0,0.05400000015894572
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,128,0,1,float16,fp8,0,0.05421866476535797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,128,0,1,fp8,fp8,0,0.05022933085759481
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,128,0,1,float16,float16,0,0.053904001911481224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,128,0,1,float16,fp8,0,0.054586668809254967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,128,0,1,fp8,fp8,0,0.0518506666024526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,128,0,1,float16,float16,0,0.05572799841562907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,128,0,1,float16,fp8,0,0.05589333176612854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,128,0,1,fp8,fp8,0,0.051967998345692955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,128,0,1,float16,float16,0,0.05012799799442291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,128,0,1,float16,fp8,0,0.04987733562787374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,128,0,1,fp8,fp8,0,0.0479360024134318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,128,0,1,float16,float16,0,0.05003733436266581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,128,0,1,float16,fp8,0,0.04969066878159841
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,128,0,1,fp8,fp8,0,0.04789866507053375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,128,0,1,float16,float16,0,0.05006400247414907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,128,0,1,float16,fp8,0,0.049829334020614624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,128,0,1,fp8,fp8,0,0.04780800143877665
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,128,0,1,float16,float16,0,0.048357332746187844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,128,0,1,float16,fp8,0,0.04974400003751119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,128,0,1,fp8,fp8,0,0.04808533191680908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,128,0,1,float16,float16,0,0.6112053394317627
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,128,0,1,fp8,fp8,0,0.5402346849441528
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,128,0,1,float16,fp8,0,0.6101706822713217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,128,0,1,fp8,fp8,0,0.5525013208389282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,128,0,1,float16,float16,0,0.6224319934844971
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,128,0,1,float16,fp8,0,0.6203893423080444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,128,0,1,fp8,fp8,0,0.5619253317515055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,128,0,1,float16,float16,0,0.6303360064824423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,128,0,1,float16,fp8,0,0.6358933448791504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,128,0,1,float16,fp8,0,0.35020267963409424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,128,0,1,float16,float16,0,0.34774935245513916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,128,0,1,fp8,fp8,0,0.31962666908899945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,128,0,1,fp8,fp8,0,0.28117332855860394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,128,0,1,float16,float16,0,0.31123199065526325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,128,0,1,float16,fp8,0,0.31066666046778363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,128,0,1,float16,float16,0,0.3169013261795044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,128,0,1,fp8,fp8,0,0.2867199977238973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,128,0,1,float16,fp8,0,0.31939200560251874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,128,0,1,fp8,fp8,0,0.2935146689414978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,128,0,1,float16,float16,0,0.3229759931564331
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,128,0,1,float16,fp8,0,0.3255359927813212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,128,0,1,float16,float16,0,0.16215999921162924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,128,0,1,float16,fp8,0,0.18890132506688437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,128,0,1,fp8,fp8,0,0.17521067460378012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,128,0,1,float16,float16,0,0.18870933850606283
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,128,0,1,float16,fp8,0,0.16445866227149963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,128,0,1,fp8,fp8,0,0.15497066577275595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,128,0,1,float16,float16,0,0.17087467511494955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,128,0,1,float16,fp8,0,0.17097600301106772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,128,0,1,fp8,fp8,0,0.1574560006459554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,128,0,1,float16,float16,0,0.17520000537236533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,128,0,1,float16,fp8,0,0.17650665839513144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,128,0,1,fp8,fp8,0,0.1611840029557546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,128,0,1,float16,float16,0,0.10725333293279012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,128,0,1,float16,fp8,0,0.10644267002741496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,128,0,1,fp8,fp8,0,0.10175999999046326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,128,0,1,float16,float16,0,0.0936959981918335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,128,0,1,float16,fp8,0,0.09333333373069763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,128,0,1,fp8,fp8,0,0.0841919978459676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,128,0,1,float16,float16,0,0.09442666172981262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,128,0,1,float16,fp8,0,0.09639466802279155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,128,0,1,fp8,fp8,0,0.08530132969220479
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,128,0,1,float16,fp8,0,0.09681600332260132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,128,0,1,float16,float16,0,0.09642666578292847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,128,0,1,float16,float16,0,0.06252266466617584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,128,0,1,float16,fp8,0,0.06410133341948192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,128,0,1,fp8,fp8,0,0.05994666616121928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,128,0,1,fp8,fp8,0,0.0937546690305074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,128,0,1,float16,float16,0,0.06033066908518473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,128,0,1,float16,fp8,0,0.06039466460545858
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,128,0,1,fp8,fp8,0,0.056143999099731445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,128,0,1,float16,float16,0,0.061887999375661217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,128,0,1,float16,fp8,0,0.06145066519578298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,128,0,1,fp8,fp8,0,0.05710400144259135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,128,0,1,float16,float16,0,0.062463998794555664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,128,0,1,float16,fp8,0,0.06321600079536438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,128,0,1,float16,float16,0,0.04568000137805939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,128,0,1,fp8,fp8,0,0.0582239975531896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,128,0,1,fp8,fp8,0,0.04363200068473816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,128,0,1,float16,fp8,0,0.0461706668138504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,128,0,1,float16,float16,0,0.0439626673857371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,128,0,1,float16,fp8,0,0.04587733248869578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,128,0,1,fp8,fp8,0,0.041850666205088295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,128,0,1,float16,float16,0,0.04505600035190582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,128,0,1,float16,fp8,0,0.04535466432571411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,128,0,1,fp8,fp8,0,0.04176533222198486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,128,0,1,float16,float16,0,0.04590400060017904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,128,0,1,float16,fp8,0,0.04427200059096018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,128,0,1,fp8,fp8,0,0.04389866689840952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,128,0,1,float16,float16,0,0.0417546679576238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,128,0,1,float16,fp8,0,0.04189866781234741
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,128,0,1,fp8,fp8,0,0.03985599925120672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,128,0,1,float16,float16,0,0.041946664452552795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,128,0,1,float16,fp8,0,0.04167466859022776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,128,0,1,fp8,fp8,0,0.039493332306543984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,128,0,1,float16,float16,0,0.04188266893227895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,128,0,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,128,0,1,fp8,fp8,0,0.0395359992980957
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,128,0,1,float16,float16,0,0.04207466542720795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,128,0,1,float16,fp8,0,0.041840001940727234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,128,0,1,fp8,fp8,0,0.03974399964014689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,128,0,1,float16,float16,0,0.6101973454157511
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,128,0,1,float16,fp8,0,0.6074293454488119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,128,0,1,fp8,fp8,0,0.5919573307037354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,128,0,1,float16,float16,0,0.6185333331425985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,128,0,1,float16,fp8,0,0.6175520022710165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,128,0,1,fp8,fp8,0,0.6008479992548624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,128,0,1,float16,float16,0,0.6466346581776937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,128,0,1,float16,fp8,0,0.6473120053609213
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,128,0,1,fp8,fp8,0,0.6425866683324178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,128,0,1,float16,float16,0,0.35434667269388836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,128,0,1,float16,fp8,0,0.3516266743342082
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,128,0,1,fp8,fp8,0,0.34786665439605713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,128,0,1,float16,float16,0,0.3173919916152954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,128,0,1,float16,fp8,0,0.3160319924354553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,128,0,1,fp8,fp8,0,0.30587200323740643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,128,0,1,float16,float16,0,0.32267733414967853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,128,0,1,fp8,fp8,0,0.3107840021451314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,128,0,1,float16,fp8,0,0.32014399766921997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,128,0,1,float16,float16,0,0.3370506763458252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,128,0,1,float16,fp8,0,0.3331413269042969
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,128,0,1,fp8,fp8,0,0.3218613266944885
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,128,0,1,float16,float16,0,0.19075200955073038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,128,0,1,float16,fp8,0,0.19138665994008383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,128,0,1,fp8,fp8,0,0.18643200397491455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,128,0,1,float16,float16,0,0.17128000656763712
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,128,0,1,float16,fp8,0,0.17032533884048462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,128,0,1,fp8,fp8,0,0.16457600394884744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,128,0,1,fp8,fp8,0,0.16786134243011475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,128,0,1,float16,fp8,0,0.17356799046198526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,128,0,1,float16,float16,0,0.17378666003545126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,128,0,1,float16,float16,0,0.10945066809654236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,128,0,1,float16,fp8,0,0.18016533056894937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,128,0,1,fp8,fp8,0,0.17361599206924438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,128,0,1,float16,float16,0,0.18218666315078735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,128,0,1,float16,fp8,0,0.10786133011182149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,128,0,1,fp8,fp8,0,0.10519466797510783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,128,0,1,float16,float16,0,0.0971999963124593
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,128,0,1,float16,fp8,0,0.09438932935396831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,128,0,1,fp8,fp8,0,0.08876799543698628
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,128,0,1,float16,float16,0,0.09531199932098389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,128,0,1,float16,fp8,0,0.09573866923650105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,128,0,1,fp8,fp8,0,0.09170132875442505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,128,0,1,float16,float16,0,0.10218133529027303
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,128,0,1,float16,fp8,0,0.10121599833170573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,128,0,1,fp8,fp8,0,0.09851732850074768
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,128,0,1,float16,float16,0,0.06241066753864288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,128,0,1,float16,fp8,0,0.06046399970849355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,128,0,1,float16,float16,0,0.05821333328882853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,128,0,1,fp8,fp8,0,0.06286933521429698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,128,0,1,float16,fp8,0,0.058490668733914696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,128,0,1,fp8,fp8,0,0.05395199855168661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,128,0,1,float16,float16,0,0.05819733440876007
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,128,0,1,float16,fp8,0,0.05829866727193197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,128,0,1,fp8,fp8,0,0.05411200225353241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,128,0,1,float16,float16,0,0.0584853341182073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,128,0,1,float16,fp8,0,0.05996266504128774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,128,0,1,fp8,fp8,0,0.058229332168896995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,128,0,1,float16,float16,0,0.03797333439191183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,128,0,1,float16,fp8,0,0.03948266555865606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,128,0,1,fp8,fp8,0,0.03777066618204117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,128,0,1,float16,float16,0,0.03745600084463755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,128,0,1,float16,fp8,0,0.03756800045569738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,128,0,1,fp8,fp8,0,0.037530665596326195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,128,0,1,float16,float16,0,0.03772266705830892
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,128,0,1,float16,fp8,0,0.03775466730197271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,128,0,1,fp8,fp8,0,0.037733333806196846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,128,0,1,float16,float16,0,0.03770666569471359
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,128,0,1,float16,fp8,0,0.03947199881076813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,128,0,1,fp8,fp8,0,0.03786666691303253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,128,0,1,float16,float16,0,0.033546666304270424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,128,0,1,float16,fp8,0,0.03358400116364161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,128,0,1,fp8,fp8,0,0.033413333197434746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,128,0,1,float16,float16,0,0.03148799886306127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,128,0,1,float16,fp8,0,0.03329066683848699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,128,0,1,fp8,fp8,0,0.03160000095764796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,128,0,1,float16,float16,0,0.03358400116364161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,128,0,1,float16,fp8,0,0.03370666752258936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,128,0,1,fp8,fp8,0,0.031328000128269196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,128,0,1,float16,float16,0,0.03358400116364161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,128,0,1,float16,fp8,0,0.033674667278925575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,128,0,1,fp8,fp8,0,0.033439998825391136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,128,0,1,float16,float16,0,0.02958933264017105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,128,0,1,float16,fp8,0,0.03149333347876867
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,128,0,1,fp8,fp8,0,0.029109333952267964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,128,0,1,float16,float16,0,0.029626667499542236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,128,0,1,float16,fp8,0,0.02939733366171519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,128,0,1,fp8,fp8,0,0.029253333806991577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,128,0,1,float16,float16,0,0.029530666768550873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,128,0,1,float16,fp8,0,0.03127466638882955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,128,0,1,fp8,fp8,0,0.027530667682488758
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,128,0,1,float16,float16,0,0.03123733401298523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,128,0,1,float16,fp8,0,0.02956266701221466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,128,0,1,fp8,fp8,0,0.029418667157491047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,128,0,1,float16,float16,0,0.5211840073267618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,128,0,1,float16,fp8,0,0.5127840042114258
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,128,0,1,fp8,fp8,0,0.5020159880320231
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,128,0,1,float16,float16,0,0.5341653426488241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,128,0,1,fp8,fp8,0,0.5110559860865275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,128,0,1,float16,fp8,0,0.5242826541264852
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,128,0,1,float16,fp8,0,0.5562826792399088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,128,0,1,float16,float16,0,0.5610613425572714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,128,0,1,fp8,fp8,0,0.568064014116923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,128,0,1,float16,fp8,0,0.30204800764719647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,128,0,1,float16,float16,0,0.3049226601918538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,128,0,1,fp8,fp8,0,0.3044053316116333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,128,0,1,float16,float16,0,0.2696640094121297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,128,0,1,float16,fp8,0,0.2664373318354289
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,128,0,1,fp8,fp8,0,0.25914132595062256
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,128,0,1,float16,float16,0,0.27689599990844727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,128,0,1,float16,fp8,0,0.27434666951497394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,128,0,1,fp8,fp8,0,0.26633600393931073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,128,0,1,float16,float16,0,0.2898826599121094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,128,0,1,float16,fp8,0,0.28454933563868207
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,128,0,1,fp8,fp8,0,0.2825813293457031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,128,0,1,float16,float16,0,0.1661066710948944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,128,0,1,float16,fp8,0,0.16107733050982156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,128,0,1,fp8,fp8,0,0.16531733671824136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,128,0,1,float16,float16,0,0.14587199687957764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,128,0,1,float16,fp8,0,0.14406399925549826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,128,0,1,fp8,fp8,0,0.13826666275660196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,128,0,1,float16,float16,0,0.1497599979241689
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,128,0,1,float16,fp8,0,0.14735999703407288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,128,0,1,fp8,fp8,0,0.14164800445238748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,128,0,1,float16,float16,0,0.1562933325767517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,128,0,1,float16,fp8,0,0.15506133437156677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,128,0,1,fp8,fp8,0,0.15027733643849692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,128,0,1,float16,fp8,0,0.09139200051625569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,128,0,1,float16,float16,0,0.09488532940546672
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,128,0,1,fp8,fp8,0,0.09271466732025146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,128,0,1,float16,fp8,0,0.07710400223731995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,128,0,1,fp8,fp8,0,0.07444799939791362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,128,0,1,float16,float16,0,0.08077866832415263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,128,0,1,float16,float16,0,0.07913599908351898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,128,0,1,float16,fp8,0,0.08054399987061818
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,128,0,1,fp8,fp8,0,0.07876266539096832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,128,0,1,float16,float16,0,0.08518933256467183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,128,0,1,float16,fp8,0,0.08616000413894653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,128,0,1,float16,float16,0,0.051957334081331887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,128,0,1,float16,fp8,0,0.05006400247414907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,128,0,1,fp8,fp8,0,0.08598933617273967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,128,0,1,float16,float16,0,0.049813335140546165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,128,0,1,fp8,fp8,0,0.053786665201187134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,128,0,1,float16,fp8,0,0.04833066463470459
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,128,0,1,fp8,fp8,0,0.04576000074545542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,128,0,1,float16,float16,0,0.050000001986821495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,128,0,1,fp8,fp8,0,0.04614399870236715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,128,0,1,float16,float16,0,0.05008000135421753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,128,0,1,float16,fp8,0,0.05050666630268097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,128,0,1,float16,fp8,0,0.04996266464392344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,128,0,1,float16,float16,0,0.03336533407370249
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,128,0,1,fp8,fp8,0,0.04978133241335551
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,128,0,1,float16,fp8,0,0.03324799984693527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,128,0,1,fp8,fp8,0,0.03341866781314214
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,128,0,1,float16,float16,0,0.03242666771014532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,128,0,1,float16,fp8,0,0.0315786674618721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,128,0,1,fp8,fp8,0,0.03123733401298523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,128,0,1,float16,float16,0,0.03326933334271113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,128,0,1,float16,fp8,0,0.033370666205883026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,128,0,1,fp8,fp8,0,0.03136533250411352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,128,0,1,float16,float16,0,0.03179199993610382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,128,0,1,float16,fp8,0,0.03345600018898646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,128,0,1,fp8,fp8,0,0.03378133227427801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,128,0,1,float16,float16,0,0.027424000203609467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,128,0,1,float16,fp8,0,0.027263998985290527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,128,0,1,fp8,fp8,0,0.027610667049884796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,128,0,1,float16,float16,0,0.025568000972270966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,128,0,1,float16,fp8,0,0.027552001178264618
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,128,0,1,fp8,fp8,0,0.025413334369659424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,128,0,1,float16,float16,0,0.027466667195161183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,128,0,1,float16,fp8,0,0.02731200059254964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,128,0,1,fp8,fp8,0,0.025381334125995636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,128,0,1,float16,float16,0,0.02773333340883255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,128,0,1,float16,fp8,0,0.027488000690937042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,128,0,1,fp8,fp8,0,0.026330667237440746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,128,0,1,float16,float16,0,0.023210667073726654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,128,0,1,float16,fp8,0,0.025450666745503742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,128,0,1,fp8,fp8,0,0.02347733328739802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,128,0,1,float16,float16,0,0.0232640008131663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,128,0,1,float16,fp8,0,0.0232640008131663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,128,0,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,128,0,1,float16,float16,0,0.025397333006064098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,128,0,1,float16,fp8,0,0.023397333920001984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,128,0,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,128,0,1,float16,float16,0,0.023232000569502514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,128,0,1,float16,fp8,0,0.025445332129796345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,128,0,1,fp8,fp8,0,0.023082666099071503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,128,0,1,float16,float16,0,0.0232640008131663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,128,0,1,float16,fp8,0,0.023413332800070446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,128,0,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,128,0,1,float16,float16,0,0.02347733328739802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,128,0,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,128,0,1,fp8,fp8,0,0.023103999594847362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,128,0,1,float16,float16,0,0.02366400013367335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,128,0,1,float16,fp8,0,0.023408000667889912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,128,0,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,128,0,1,float16,float16,0,0.023376000424226124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,128,0,1,float16,fp8,0,0.024549332757790882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,128,0,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,128,0,1,float16,float16,0,0.2363413373629252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,128,0,1,float16,fp8,0,0.23377599318822226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,128,0,1,fp8,fp8,0,0.24252800146738687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,128,0,1,float16,float16,0,0.24566400051116943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,128,0,1,float16,fp8,0,0.23858133951822916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,128,0,1,fp8,fp8,0,0.24634132782618204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,128,0,1,float16,float16,0,0.2654719948768616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,128,0,1,float16,fp8,0,0.25632532437642414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,128,0,1,fp8,fp8,0,0.2582613428433736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,128,0,1,float16,float16,0,0.15243200461069742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,128,0,1,float16,fp8,0,0.14967466394106546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,128,0,1,fp8,fp8,0,0.1513920029004415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,128,0,1,float16,float16,0,0.12681066989898682
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,128,0,1,float16,fp8,0,0.12441600362459819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,128,0,1,fp8,fp8,0,0.12781866391499838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,128,0,1,float16,float16,0,0.13205333550771078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,128,0,1,float16,fp8,0,0.12877866625785828
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,128,0,1,fp8,fp8,0,0.13082666198412576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,128,0,1,float16,float16,0,0.13961600263913473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,128,0,1,float16,fp8,0,0.13859200477600098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,128,0,1,fp8,fp8,0,0.13732266426086426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,128,0,1,float16,float16,0,0.08935466408729553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,128,0,1,float16,fp8,0,0.08553066849708557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,128,0,1,fp8,fp8,0,0.08700266480445862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,128,0,1,float16,float16,0,0.07125333448251088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,128,0,1,float16,fp8,0,0.07081066568692525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,128,0,1,fp8,fp8,0,0.07029866675535838
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,128,0,1,float16,float16,0,0.07392000158627827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,128,0,1,float16,fp8,0,0.07182399928569794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,128,0,1,fp8,fp8,0,0.0724373310804367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,128,0,1,float16,float16,0,0.07961600025494893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,128,0,1,float16,fp8,0,0.07737066845099132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,128,0,1,fp8,fp8,0,0.07874133189519246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,128,0,1,float16,float16,0,0.0459146648645401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,128,0,1,float16,fp8,0,0.04423999786376953
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,128,0,1,fp8,fp8,0,0.047194664676984154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,128,0,1,float16,float16,0,0.04180799921353658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,128,0,1,float16,fp8,0,0.04293866455554962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,128,0,1,fp8,fp8,0,0.03979733337958654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,128,0,1,float16,float16,0,0.04377066592375437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,128,0,1,float16,fp8,0,0.04382933179537455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,128,0,1,fp8,fp8,0,0.041690667470296226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,128,0,1,float16,float16,0,0.0459146648645401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,128,0,1,float16,fp8,0,0.04438399771849314
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,128,0,1,fp8,fp8,0,0.045093332727750145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,128,0,1,float16,float16,0,0.03125333289305369
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,128,0,1,float16,fp8,0,0.029482667644818623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,128,0,1,fp8,fp8,0,0.03140799949566523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,128,0,1,float16,float16,0,0.02958933264017105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,128,0,1,float16,fp8,0,0.029498666524887085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,128,0,1,fp8,fp8,0,0.02757333219051361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,128,0,1,float16,float16,0,0.029285334050655365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,128,0,1,float16,fp8,0,0.029520000020662945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,128,0,1,fp8,fp8,0,0.029343999922275543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,128,0,1,float16,float16,0,0.030042665700117748
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,128,0,1,float16,fp8,0,0.03133866687615713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,128,0,1,fp8,fp8,0,0.03136000037193298
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,128,0,1,float16,float16,0,0.025248001019159954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,128,0,1,fp8,fp8,0,0.02369600037733714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,128,0,1,float16,fp8,0,0.025674665967623394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,128,0,1,float16,float16,0,0.025173333783944447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,128,0,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,128,0,1,fp8,fp8,0,0.022970666488011677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,128,0,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,128,0,1,fp8,fp8,0,0.023365333676338196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,128,0,1,float16,float16,0,0.025578667720158894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,128,0,1,float16,float16,0,0.023455999791622162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,128,0,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,128,0,1,fp8,fp8,0,0.02349333216746648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,128,0,1,float16,fp8,0,0.021546666820844013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,128,0,1,float16,float16,0,0.021216000119845074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,128,0,1,fp8,fp8,0,0.020992000897725422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,128,0,1,float16,float16,0,0.02110933264096578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,128,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,128,0,1,fp8,fp8,0,0.02145066608985265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,128,0,1,float16,float16,0,0.021130666136741638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,128,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,128,0,1,fp8,fp8,0,0.021397332350413006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,128,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,128,0,1,float16,fp8,0,0.021477334201335907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,128,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,128,0,1,float16,float16,0,0.021141332884629566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,128,0,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,128,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,128,0,1,float16,float16,0,0.02120000123977661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,128,0,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,128,0,1,float16,float16,0,0.021349333226680756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,128,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,128,0,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,128,0,1,float16,float16,0,0.019946667055288952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,128,0,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,128,0,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,128,0,1,float16,float16,0,0.021205333371957142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,128,0,1,float16,fp8,0,0.021322667598724365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,128,0,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,128,0,1,float16,float16,0,0.02130666623512904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,128,0,1,float16,fp8,0,0.021104000508785248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,128,0,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,128,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,128,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,128,0,1,fp8,fp8,0,0.019717333217461903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,128,0,1,float16,float16,0,0.01942933350801468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,128,0,1,float16,fp8,0,0.01940800001223882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,128,0,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,128,0,1,float16,fp8,0,0.13422933220863342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,128,0,1,float16,float16,0,0.13460800051689148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,128,0,1,fp8,fp8,0,0.13450666268666586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,128,0,1,float16,float16,0,0.1362613340218862
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,128,0,1,float16,fp8,0,0.1363146702448527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,128,0,1,fp8,fp8,0,0.13792533675829569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,128,0,1,float16,float16,0,0.14412800470987955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,128,0,1,float16,fp8,0,0.1430346667766571
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,128,0,1,fp8,fp8,0,0.14671466747919717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,128,0,1,float16,float16,0,0.08752533793449402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,128,0,1,float16,fp8,0,0.08410132924715678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,128,0,1,fp8,fp8,0,0.08888000249862671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,128,0,1,float16,fp8,0,0.07329600056012471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,128,0,1,float16,float16,0,0.07457600037256877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,128,0,1,fp8,fp8,0,0.0730453332265218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,128,0,1,float16,float16,0,0.07548266649246216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,128,0,1,float16,fp8,0,0.07383466760317485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,128,0,1,fp8,fp8,0,0.07695466776688893
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,128,0,1,float16,float16,0,0.0788373351097107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,128,0,1,float16,fp8,0,0.0788373351097107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,128,0,1,fp8,fp8,0,0.08214933176835378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,128,0,1,float16,float16,0,0.04773333172003428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,128,0,1,float16,fp8,0,0.045941332976023354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,128,0,1,fp8,fp8,0,0.04876266419887543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,128,0,1,float16,float16,0,0.04608533283074697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,128,0,1,float16,fp8,0,0.04569066564242045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,128,0,1,fp8,fp8,0,0.04366933306058248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,128,0,1,float16,float16,0,0.04588800172011057
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,128,0,1,float16,fp8,0,0.04604800045490265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,128,0,1,fp8,fp8,0,0.04398400088151296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,128,0,1,float16,float16,0,0.045797333121299744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,128,0,1,float16,fp8,0,0.045797333121299744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,128,0,1,fp8,fp8,0,0.046053335070610046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,128,0,1,float16,float16,0,0.031040000418821972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,128,0,1,float16,fp8,0,0.03150933235883713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,128,0,1,fp8,fp8,0,0.03148266673088074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,128,0,1,float16,float16,0,0.029317334294319153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,128,0,1,float16,fp8,0,0.029525332152843475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,128,0,1,fp8,fp8,0,0.02958400050799052
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,128,0,1,float16,float16,0,0.02943466603755951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,128,0,1,float16,fp8,0,0.029290666182835896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,128,0,1,fp8,fp8,0,0.029685333371162415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,128,0,1,float16,float16,0,0.03182933231194814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,128,0,1,float16,fp8,0,0.03148799886306127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,128,0,1,fp8,fp8,0,0.03139200061559677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,128,0,1,float16,float16,0,0.02327999969323476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,128,0,1,float16,fp8,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,128,0,1,fp8,fp8,0,0.023168000082174938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,128,0,1,float16,float16,0,0.02109333376089732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,128,0,1,float16,fp8,0,0.021221332252025604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,128,0,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,128,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,128,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,128,0,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,128,0,1,float16,float16,0,0.022858666876951855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,128,0,1,float16,fp8,0,0.021322667598724365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,128,0,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,128,0,1,float16,float16,0,0.018960000326236088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,128,0,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,128,0,1,fp8,fp8,0,0.01947733387351036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,128,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,128,0,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,128,0,1,fp8,fp8,0,0.01937599976857503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,128,0,1,float16,float16,0,0.019199999670187633
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,128,0,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,128,0,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,128,0,1,float16,float16,0,0.019493332753578823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,128,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,128,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,128,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,128,0,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,128,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,128,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,128,0,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,128,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,128,0,1,float16,fp8,0,0.01871466636657715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,128,0,1,float16,float16,0,0.017423999806245167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,128,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,128,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,128,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,128,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,128,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,128,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,128,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,128,0,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,128,0,1,float16,fp8,0,0.01798933371901512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,128,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,128,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,128,0,1,float16,fp8,0,0.017685333887736004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,128,0,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,128,0,1,float16,float16,0,0.09494400024414062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,128,0,1,float16,fp8,0,0.0923520028591156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,128,0,1,fp8,fp8,0,0.08945066730181377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,128,0,1,float16,float16,0,0.09476799766222636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,128,0,1,float16,fp8,0,0.09340266386667888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,128,0,1,fp8,fp8,0,0.09340266386667888
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,128,0,1,float16,float16,0,0.09689066807428996
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,128,0,1,float16,fp8,0,0.09745599826176961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,128,0,1,fp8,fp8,0,0.09948800007502238
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,128,0,1,float16,float16,0,0.05584000051021576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,128,0,1,float16,fp8,0,0.054527997970581055
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,128,0,1,fp8,fp8,0,0.0558186670144399
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,128,0,1,float16,float16,0,0.05229333539803823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,128,0,1,float16,fp8,0,0.053183997670809426
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,128,0,1,fp8,fp8,0,0.050160000721613564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,128,0,1,float16,float16,0,0.05402133365472158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,128,0,1,float16,fp8,0,0.05260799825191498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,128,0,1,fp8,fp8,0,0.05008000135421753
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,128,0,1,float16,float16,0,0.0545653353134791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,128,0,1,float16,fp8,0,0.052383999029795326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,128,0,1,fp8,fp8,0,0.0539680023988088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,128,0,1,float16,float16,0,0.03711466739575068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,128,0,1,float16,fp8,0,0.03565333286921183
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,128,0,1,fp8,fp8,0,0.035962666074434914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,128,0,1,float16,float16,0,0.035461333890755974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,128,0,1,float16,fp8,0,0.035530666510264076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,128,0,1,fp8,fp8,0,0.033376000821590424
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,128,0,1,float16,float16,0,0.035375999907652535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,128,0,1,float16,fp8,0,0.035429333647092186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,128,0,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,128,0,1,float16,float16,0,0.03559466699759165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,128,0,1,float16,fp8,0,0.03581333408753077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,128,0,1,float16,float16,0,0.025120000044504803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,128,0,1,fp8,fp8,0,0.03532800078392029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,128,0,1,float16,fp8,0,0.025205334027608235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,128,0,1,float16,float16,0,0.025066666305065155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,128,0,1,fp8,fp8,0,0.025349333882331848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,128,0,1,float16,fp8,0,0.023455999791622162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,128,0,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,128,0,1,float16,float16,0,0.02325333406527837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,128,0,1,float16,fp8,0,0.023237332701683044
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,128,0,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,128,0,1,float16,float16,0,0.023610666394233704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,128,0,1,float16,fp8,0,0.025205334027608235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,128,0,1,fp8,fp8,0,0.02369600037733714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,128,0,1,float16,float16,0,0.019215999792019527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,128,0,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,128,0,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,128,0,1,float16,float16,0,0.017509333789348602
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,128,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,128,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,128,0,1,float16,float16,0,0.019274666905403137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,128,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,128,0,1,float16,float16,0,0.018778666853904724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,128,0,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,128,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,128,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,128,0,1,float16,float16,0,0.017386666188637417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,128,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,128,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,128,0,1,float16,fp8,0,0.01817600056529045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,128,0,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,128,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,128,0,1,float16,float16,0,0.017866666118303936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,128,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,128,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,128,0,1,fp8,fp8,0,0.018320000420014065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,128,0,1,float16,float16,0,0.017573333034912746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,128,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,128,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,128,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,128,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,128,0,1,float16,float16,0,0.016469333320856094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,128,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,128,0,1,fp8,fp8,0,0.016010666886965435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,128,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,128,0,1,float16,fp8,0,0.01758933315674464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,128,0,1,fp8,fp8,0,0.017498667041460674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,128,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,128,0,1,float16,float16,0,0.07268266876538594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,128,0,1,float16,fp8,0,0.07041599849859874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,128,0,1,fp8,fp8,0,0.06692266464233398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,128,0,1,float16,float16,0,0.07221866647402446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,128,0,1,float16,fp8,0,0.07108266651630402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,128,0,1,fp8,fp8,0,0.06854400038719177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,128,0,1,float16,float16,0,0.07264000177383423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,128,0,1,float16,fp8,0,0.07234133283297221
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,128,0,1,float16,float16,0,0.04345599810282389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,128,0,1,float16,fp8,0,0.043578664461771645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,128,0,1,fp8,fp8,0,0.07031466563542683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,128,0,1,float16,float16,0,0.04196266829967499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,128,0,1,float16,fp8,0,0.043840001026789345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,128,0,1,fp8,fp8,0,0.04378666480382284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,128,0,1,fp8,fp8,0,0.041797334949175514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,128,0,1,float16,float16,0,0.043765331308046974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,128,0,1,float16,fp8,0,0.04177066683769226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,128,0,1,fp8,fp8,0,0.041696002086003624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,128,0,1,float16,float16,0,0.043653334180514015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,128,0,1,float16,fp8,0,0.04254400233427683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,128,0,1,float16,float16,0,0.029663999875386555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,128,0,1,float16,fp8,0,0.029509333272775013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,128,0,1,fp8,fp8,0,0.043824002146720886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,128,0,1,fp8,fp8,0,0.029301332930723827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,128,0,1,float16,float16,0,0.027589333554108936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,128,0,1,float16,fp8,0,0.02932800104220708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,128,0,1,fp8,fp8,0,0.027776000400384266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,128,0,1,float16,float16,0,0.029530666768550873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,128,0,1,float16,fp8,0,0.02924266705910365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,128,0,1,fp8,fp8,0,0.02938666691382726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,128,0,1,float16,fp8,0,0.02924799919128418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,128,0,1,float16,float16,0,0.02939733366171519
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,128,0,1,float16,float16,0,0.020949333906173706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,128,0,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,128,0,1,fp8,fp8,0,0.029535998900731403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,128,0,1,fp8,fp8,0,0.02124800036350886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,128,0,1,float16,float16,0,0.019610666980346043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,128,0,1,float16,fp8,0,0.02092266579469045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,128,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,128,0,1,float16,float16,0,0.021151999632517498
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,128,0,1,float16,fp8,0,0.021221332252025604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,128,0,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,128,0,1,float16,float16,0,0.021125334004561108
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,128,0,1,float16,fp8,0,0.021349333226680756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,128,0,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,128,0,1,float16,fp8,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,128,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,128,0,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,128,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,128,0,1,float16,fp8,0,0.017583999782800674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,128,0,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,128,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,128,0,1,float16,float16,0,0.016650666793187458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,128,0,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,128,0,1,float16,float16,0,0.018016000588734944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,128,0,1,float16,fp8,0,0.01674666628241539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,128,0,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,128,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,128,0,1,fp8,fp8,0,0.017605333278576534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,128,0,1,float16,float16,0,0.01676799977819125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,128,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,128,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,128,0,1,float16,float16,0,0.01793066660563151
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,128,0,1,fp8,fp8,0,0.016501333564519882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,128,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,128,0,1,float16,fp8,0,0.017840000490347546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,128,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,128,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,128,0,1,fp8,fp8,0,0.017535999417304993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,128,0,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,128,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,128,0,1,float16,fp8,0,0.016693333784739178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,128,0,1,float16,float16,0,0.01747200017174085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,128,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,128,0,1,float16,fp8,0,0.017498667041460674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,128,0,1,fp8,fp8,0,0.0164533331990242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,128,0,1,float16,float16,0,0.017386666188637417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,128,0,1,float16,fp8,0,0.016528000434239704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,128,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,128,0,1,fp8,fp8,0,0.015647999942302704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,128,0,1,float16,fp8,0,0.01573333392540614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,128,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,128,0,1,fp8,fp8,0,0.016224000602960587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,128,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,128,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,128,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,128,0,1,float16,float16,0,0.06268266836802165
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,128,0,1,float16,fp8,0,0.06204266846179962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,128,0,1,fp8,fp8,0,0.05835199852784475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,128,0,1,float16,fp8,0,0.061941335598627724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,128,0,1,float16,float16,0,0.0628053347269694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,128,0,1,fp8,fp8,0,0.05946666498978933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,128,0,1,float16,float16,0,0.06308799982070923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,128,0,1,fp8,fp8,0,0.06243733565012614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,128,0,1,float16,fp8,0,0.06283733248710632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,128,0,1,float16,float16,0,0.03782399992148081
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,128,0,1,float16,fp8,0,0.037685332198937736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,128,0,1,fp8,fp8,0,0.03815466662247976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,128,0,1,float16,float16,0,0.03663466622432073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,128,0,1,float16,fp8,0,0.03751466671625773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,128,0,1,fp8,fp8,0,0.03589333345492681
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,128,0,1,float16,float16,0,0.03700266778469086
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,128,0,1,float16,fp8,0,0.0378560001651446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,128,0,1,fp8,fp8,0,0.03608533243338267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,128,0,1,float16,float16,0,0.03755733370780945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,128,0,1,float16,fp8,0,0.03755199909210205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,128,0,1,fp8,fp8,0,0.03759466608365377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,128,0,1,float16,float16,0,0.027434666951497395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,128,0,1,float16,fp8,0,0.025450666745503742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,128,0,1,fp8,fp8,0,0.027285332481066387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,128,0,1,float16,float16,0,0.026928000152111053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,128,0,1,float16,fp8,0,0.027327999472618103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,128,0,1,fp8,fp8,0,0.02536533276240031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,128,0,1,float16,float16,0,0.025386666258176167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,128,0,1,float16,fp8,0,0.027301333844661713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,128,0,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,128,0,1,float16,float16,0,0.025402667621771496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,128,0,1,float16,fp8,0,0.025263999899228413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,128,0,1,fp8,fp8,0,0.02571200082699458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,128,0,1,float16,float16,0,0.020949333906173706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,128,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,128,0,1,fp8,fp8,0,0.01966399947802226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,128,0,1,float16,float16,0,0.020682666450738907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,128,0,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,128,0,1,fp8,fp8,0,0.021013334393501282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,128,0,1,float16,float16,0,0.02056533346573512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,128,0,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,128,0,1,fp8,fp8,0,0.020960000654061634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,128,0,1,float16,float16,0,0.02029866725206375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,128,0,1,float16,fp8,0,0.021104000508785248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,128,0,1,fp8,fp8,0,0.021205333371957142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,128,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,128,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,128,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,128,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,128,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,128,0,1,fp8,fp8,0,0.018650667121013004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,128,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,128,0,1,float16,float16,0,0.015487999965747198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,128,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,128,0,1,fp8,fp8,0,0.016255999604860943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,128,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,128,0,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,128,0,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,128,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,128,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,128,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,128,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,128,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,128,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,128,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,128,0,1,fp8,fp8,0,0.015504000087579092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,128,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,128,0,1,float16,fp8,0,0.016154666741689045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,128,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,128,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,128,0,1,float16,fp8,0,0.015423999478419622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,128,0,1,fp8,fp8,0,0.017349333812793095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,128,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,128,0,1,fp8,fp8,0,0.015642666568358738
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,128,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,128,0,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,128,0,1,fp8,fp8,0,0.015882667154073715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,128,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,128,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,128,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,128,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,128,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,128,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,128,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,128,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,128,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,0,0.05349333087603251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,0,0.05386666456858317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,1,128,0,1,fp8,fp8,0,0.05171200136343638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,0,0.05418133238951365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,0,0.05193066596984863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,2,128,0,1,fp8,fp8,0,0.05036266644795736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,0,0.054383998115857445
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,0,0.05198400219281515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,8,4,128,0,1,fp8,fp8,0,0.05004799862702688
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,0,0.033589333295822144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,0,0.03344533344109853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,8,128,0,1,fp8,fp8,0,0.031856000423431396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,0,0.03333866596221924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,0,0.03376533339420954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,1,128,0,1,fp8,fp8,0,0.03162133445342382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,0,0.03346133232116699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,0,0.03346133232116699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,2,128,0,1,fp8,fp8,0,0.03350933392842611
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,0,0.03379733363787333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,0,0.03383466601371765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,8,4,128,0,1,fp8,fp8,0,0.03321066747109095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,0,0.02516799916823705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,0,0.023397333920001984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,8,128,0,1,fp8,fp8,0,0.02345066765944163
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,0,0.025194667279720306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,0,0.02516799916823705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,1,128,0,1,fp8,fp8,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,0,0.025125332176685333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,0,0.025392000873883564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,2,128,0,1,fp8,fp8,0,0.025290665527184803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,0,0.025493333737055462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,0,0.02532266577084859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,8,4,128,0,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,0,0.01934933289885521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,0,0.02128000060717265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,8,128,0,1,fp8,fp8,0,0.020960000654061634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,0,0.021930667261282604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,1,128,0,1,fp8,fp8,0,0.02109333376089732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,0,0.02346133440732956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,0,0.023306667804718018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,2,128,0,1,fp8,fp8,0,0.020970667401949566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,0,0.02141333371400833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,8,4,128,0,1,fp8,fp8,0,0.021210665504137676
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,8,128,0,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,1,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,2,128,0,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,0,0.018922666708628338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,0,0.018895999838908512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,0,0.015562667200962702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,8,4,128,0,1,fp8,fp8,0,0.01770666614174843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,8,128,0,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,1,128,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,2,128,0,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,8,4,128,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,0,0.016677333662907284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,8,128,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,1,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,2,128,0,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,8,4,128,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,8,128,0,1,fp8,fp8,0,0.015461333096027374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,1,128,0,1,fp8,fp8,0,0.015552000453074774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,0,0.01599466676513354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,2,128,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,0,0.01562133307258288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,8,4,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,8,128,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,0,0.015594666202863058
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,1,128,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,0,0.015306666493415833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,2,128,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,8,4,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,128,0,1,fp8,fp8,0,1.4832852681477864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,128,0,1,float16,fp8,0,1.741978645324707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,128,0,1,float16,float16,0,1.9647146860758464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,128,0,1,fp8,fp8,0,1.4973386128743489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,128,0,1,float16,float16,0,2.4718079566955566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,128,0,1,float16,fp8,0,2.3479040463765464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,128,0,1,float16,float16,0,0.9626133441925049
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,128,0,1,float16,fp8,0,0.9652907053629557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,128,0,1,fp8,fp8,0,0.8369173208872477
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,128,0,1,float16,float16,0,0.9547893206278483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,128,0,1,float16,fp8,0,1.0102186997731526
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,128,0,1,fp8,fp8,0,0.820576032002767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,128,0,1,float16,float16,0,0.9426720142364502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,128,0,1,float16,fp8,0,0.975056012471517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,128,0,1,float16,float16,0,0.565775990486145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,128,0,1,float16,fp8,0,0.5622719923655192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,128,0,1,fp8,fp8,0,0.9599093596140543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,128,0,1,fp8,fp8,0,0.4978293180465698
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,128,0,1,float16,float16,0,0.5523253281911215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,128,0,1,float16,fp8,0,0.5470186471939087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,128,0,1,fp8,fp8,0,0.480677326520284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,128,0,1,float16,float16,0,0.5566453138987223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,128,0,1,fp8,fp8,0,0.4878773291905721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,128,0,1,float16,fp8,0,0.5571839809417725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,128,0,1,float16,float16,0,0.35627734661102295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,128,0,1,float16,fp8,0,0.35782400767008465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,128,0,1,fp8,fp8,0,0.32079466183980304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,128,0,1,float16,float16,0,0.35210665067036945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,128,0,1,float16,fp8,0,0.3548693259557088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,128,0,1,fp8,fp8,0,0.3142559925715129
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,128,0,1,float16,float16,0,0.3521440029144287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,128,0,1,float16,fp8,0,0.35389868418375653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,128,0,1,fp8,fp8,0,0.31700267394383747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,128,0,1,float16,float16,0,1.0643786589304607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,128,0,1,fp8,fp8,0,0.9180693626403809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,128,0,1,float16,fp8,0,1.0523200035095215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,128,0,1,float16,float16,0,1.06058136622111
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,128,0,1,float16,float16,0,0.7244640191396078
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,128,0,1,float16,fp8,0,1.0757226943969727
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,128,0,1,float16,fp8,0,0.7038613160451254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,128,0,1,fp8,fp8,0,0.9247999986012777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,128,0,1,fp8,fp8,0,0.5337546666463217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,128,0,1,float16,fp8,0,0.5899519920349121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,128,0,1,float16,float16,0,0.5945333242416382
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,128,0,1,fp8,fp8,0,0.5177599986394247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,128,0,1,float16,float16,0,0.5977439880371094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,128,0,1,float16,float16,0,0.3651520013809204
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,128,0,1,fp8,fp8,0,0.5265599886576334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,128,0,1,float16,fp8,0,0.5950239896774292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,128,0,1,fp8,fp8,0,0.32638933261235553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,128,0,1,float16,fp8,0,0.36477335294087726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,128,0,1,float16,float16,0,0.3524693250656128
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,128,0,1,float16,fp8,0,0.348197340965271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,128,0,1,fp8,fp8,0,0.31197865804036456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,128,0,1,float16,float16,0,0.35364798704783124
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,128,0,1,float16,fp8,0,0.35602664947509766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,128,0,1,fp8,fp8,0,0.3160960078239441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,128,0,1,float16,float16,0,0.23194666703542074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,128,0,1,float16,fp8,0,0.2306506633758545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,128,0,1,fp8,fp8,0,0.21424533923467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,128,0,1,float16,float16,0,0.229477326075236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,128,0,1,float16,fp8,0,0.23063466946283975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,128,0,1,fp8,fp8,0,0.2085919976234436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,128,0,1,float16,float16,0,0.22986666361490884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,128,0,1,float16,fp8,0,0.23265065749486288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,128,0,1,fp8,fp8,0,0.2111519972483317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,128,0,1,float16,float16,0,0.7805866400400797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,128,0,1,float16,fp8,0,0.7798720200856527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,128,0,1,fp8,fp8,0,0.6817333698272705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,128,0,1,float16,float16,0,0.9839786688486735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,128,0,1,fp8,fp8,0,0.6909706592559814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,128,0,1,float16,fp8,0,0.7885493437449137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,128,0,1,float16,float16,0,0.4591199954350789
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,128,0,1,fp8,fp8,0,0.40696533521016437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,128,0,1,float16,fp8,0,0.46397864818573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,128,0,1,float16,float16,0,0.44170665740966797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,128,0,1,fp8,fp8,0,0.3964000145594279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,128,0,1,float16,fp8,0,0.44413332144419354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,128,0,1,fp8,fp8,0,0.3963786760965983
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,128,0,1,float16,fp8,0,0.4520266850789388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,128,0,1,float16,float16,0,0.4524906476338704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,128,0,1,float16,float16,0,0.2819146712621053
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,128,0,1,fp8,fp8,0,0.2561546762784322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,128,0,1,float16,fp8,0,0.2858346700668335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,128,0,1,fp8,fp8,0,0.242959996064504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,128,0,1,float16,float16,0,0.2736053268114726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,128,0,1,float16,fp8,0,0.2730773289998372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,128,0,1,fp8,fp8,0,0.24393600225448608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,128,0,1,float16,float16,0,0.27581334114074707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,128,0,1,float16,fp8,0,0.27632532517115277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,128,0,1,float16,float16,0,0.17197332779566446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,128,0,1,fp8,fp8,0,0.15844800074895224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,128,0,1,float16,fp8,0,0.17620267470677695
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,128,0,1,fp8,fp8,0,0.15558399756749472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,128,0,1,float16,fp8,0,0.1718133290608724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,128,0,1,float16,float16,0,0.1723733345667521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,128,0,1,float16,fp8,0,0.17449599504470825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,128,0,1,float16,float16,0,0.17342400550842285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,128,0,1,fp8,fp8,0,0.15658666690190634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,128,0,1,float16,float16,0,1.0026986598968506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,128,0,1,fp8,fp8,0,0.8733759721120199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,128,0,1,float16,fp8,0,1.0101280212402344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,128,0,1,float16,float16,0,1.009178638458252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,128,0,1,float16,float16,0,0.5728853146235148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,128,0,1,float16,fp8,0,1.0221599737803142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,128,0,1,float16,fp8,0,0.5702773332595825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,128,0,1,fp8,fp8,0,0.8860373497009277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,128,0,1,fp8,fp8,0,0.5045919815699259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,128,0,1,float16,float16,0,0.5548586845397949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,128,0,1,float16,fp8,0,0.5505333344141642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,128,0,1,fp8,fp8,0,0.4787466526031494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,128,0,1,float16,float16,0,0.5576586723327637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,128,0,1,fp8,fp8,0,0.48968533674875897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,128,0,1,float16,float16,0,0.3280373414357503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,128,0,1,float16,fp8,0,0.5560213327407837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,128,0,1,float16,fp8,0,0.33075199524561566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,128,0,1,fp8,fp8,0,0.297760009765625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,128,0,1,float16,float16,0,0.3115626573562622
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,128,0,1,float16,fp8,0,0.31175466378529865
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,128,0,1,fp8,fp8,0,0.28353599707285565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,128,0,1,float16,float16,0,0.3221120039621989
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,128,0,1,fp8,fp8,0,0.28540267546971637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,128,0,1,float16,fp8,0,0.3181440035502116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,128,0,1,float16,float16,0,0.2065760095914205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,128,0,1,float16,fp8,0,0.21192532777786255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,128,0,1,fp8,fp8,0,0.18622400363286337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,128,0,1,float16,float16,0,0.1995840072631836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,128,0,1,float16,fp8,0,0.1987946629524231
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,128,0,1,fp8,fp8,0,0.1820533275604248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,128,0,1,float16,float16,0,0.20037333170572916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,128,0,1,float16,fp8,0,0.20140800873438516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,128,0,1,fp8,fp8,0,0.18252267440160116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,128,0,1,float16,float16,0,0.13221333424250284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,128,0,1,float16,fp8,0,0.13199466466903687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,128,0,1,fp8,fp8,0,0.12388267119725545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,128,0,1,float16,float16,0,0.13244266311327615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,128,0,1,float16,fp8,0,0.132042666276296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,128,0,1,fp8,fp8,0,0.12020799517631531
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,128,0,1,float16,float16,0,0.1322986682256063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,128,0,1,float16,fp8,0,0.13341333468755087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,128,0,1,fp8,fp8,0,0.12227200468381245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,128,0,1,float16,float16,0,0.6360640128453573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,128,0,1,float16,fp8,0,0.6347466707229614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,128,0,1,fp8,fp8,0,0.5598400036493937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,128,0,1,float16,float16,0,0.6398826837539673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,128,0,1,float16,float16,0,0.37382400035858154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,128,0,1,fp8,fp8,0,0.5693440039952596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,128,0,1,float16,fp8,0,0.37223466237386066
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,128,0,1,float16,fp8,0,0.6415253480275472
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,128,0,1,fp8,fp8,0,0.32975999514261883
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,128,0,1,float16,float16,0,0.3499306837717692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,128,0,1,fp8,fp8,0,0.31241599718729657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,128,0,1,float16,fp8,0,0.354533314704895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,128,0,1,float16,float16,0,0.359935998916626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,128,0,1,float16,float16,0,0.21916800737380981
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,128,0,1,float16,fp8,0,0.35714133580525714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,128,0,1,fp8,fp8,0,0.31894399722417194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,128,0,1,fp8,fp8,0,0.19907732804616293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,128,0,1,float16,fp8,0,0.21869333585103354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,128,0,1,float16,float16,0,0.20396800835927328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,128,0,1,float16,fp8,0,0.20439465840657553
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,128,0,1,fp8,fp8,0,0.1829813321431478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,128,0,1,float16,float16,0,0.2074399987856547
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,128,0,1,float16,fp8,0,0.20791999499003092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,128,0,1,fp8,fp8,0,0.19024000565210977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,128,0,1,float16,float16,0,0.1360479990641276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,128,0,1,float16,fp8,0,0.136053333679835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,128,0,1,fp8,fp8,0,0.12481600046157837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,128,0,1,float16,float16,0,0.13405866424242655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,128,0,1,float16,fp8,0,0.13313066959381104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,128,0,1,fp8,fp8,0,0.12222400307655334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,128,0,1,float16,float16,0,0.1332266628742218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,128,0,1,float16,fp8,0,0.13406933347384134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,128,0,1,fp8,fp8,0,0.12030399839083354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,128,0,1,float16,float16,0,0.10140800476074219
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,128,0,1,float16,fp8,0,0.10130133231480916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,128,0,1,fp8,fp8,0,0.0953493316968282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,128,0,1,float16,float16,0,0.10142933328946431
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,128,0,1,float16,fp8,0,0.10152000188827515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,128,0,1,fp8,fp8,0,0.09525332848230998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,128,0,1,float16,float16,0,0.10153067111968994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,128,0,1,float16,fp8,0,0.1014400025208791
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,128,0,1,fp8,fp8,0,0.0953493316968282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,128,0,1,float16,float16,0,0.6431093215942383
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,128,0,1,float16,fp8,0,0.6486560106277466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,128,0,1,fp8,fp8,0,0.5707893371582031
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,128,0,1,float16,float16,0,0.6578613519668579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,128,0,1,float16,fp8,0,0.6602026621500651
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,128,0,1,fp8,fp8,0,0.5850079854329427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,128,0,1,float16,float16,0,0.364303986231486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,128,0,1,float16,fp8,0,0.37001601854960126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,128,0,1,fp8,fp8,0,0.3303999900817871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,128,0,1,float16,float16,0,0.34565865993499756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,128,0,1,float16,fp8,0,0.3474773168563843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,128,0,1,fp8,fp8,0,0.3088266650835673
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,128,0,1,float16,float16,0,0.35181868076324463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,128,0,1,fp8,fp8,0,0.31673065821329754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,128,0,1,float16,fp8,0,0.35496532917022705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,128,0,1,float16,float16,0,0.20686399936676025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,128,0,1,float16,fp8,0,0.21130667130152384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,128,0,1,fp8,fp8,0,0.1897546648979187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,128,0,1,float16,float16,0,0.19766400257746378
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,128,0,1,fp8,fp8,0,0.17538134256998697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,128,0,1,float16,fp8,0,0.19453332821528116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,128,0,1,float16,float16,0,0.19823465744654337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,128,0,1,float16,fp8,0,0.19934932390848795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,128,0,1,fp8,fp8,0,0.18269334236780801
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,128,0,1,float16,float16,0,0.12493866682052612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,128,0,1,float16,fp8,0,0.12468266487121582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,128,0,1,fp8,fp8,0,0.11602133512496948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,128,0,1,float16,float16,0,0.1218239963054657
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,128,0,1,float16,fp8,0,0.1263146698474884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,128,0,1,fp8,fp8,0,0.10826667149861653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,128,0,1,float16,float16,0,0.11972266435623169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,128,0,1,float16,fp8,0,0.12321600317955017
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,128,0,1,float16,float16,0,0.07865599791208903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,128,0,1,fp8,fp8,0,0.1109279990196228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,128,0,1,float16,fp8,0,0.08077866832415263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,128,0,1,fp8,fp8,0,0.0745066652695338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,128,0,1,float16,float16,0,0.0775679995616277
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,128,0,1,float16,fp8,0,0.07923733194669087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,128,0,1,fp8,fp8,0,0.07275199890136719
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,128,0,1,float16,float16,0,0.07809600234031677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,128,0,1,float16,fp8,0,0.07899199922879536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,128,0,1,fp8,fp8,0,0.07462933162848155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,128,0,1,float16,float16,0,0.07257066667079926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,128,0,1,float16,fp8,0,0.07449066638946533
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,128,0,1,fp8,fp8,0,0.07073066631952922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,128,0,1,float16,float16,0,0.07234666744867961
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,128,0,1,float16,fp8,0,0.07291199763615926
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,128,0,1,fp8,fp8,0,0.06829866766929626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,128,0,1,float16,float16,0,0.07247999807198842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,128,0,1,float16,fp8,0,0.072202667593956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,128,0,1,fp8,fp8,0,0.0684799998998642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,128,0,1,float16,float16,0,0.430730660756429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,128,0,1,fp8,fp8,0,0.3875253200531006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,128,0,1,float16,fp8,0,0.42956264813741046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,128,0,1,float16,float16,0,0.4402720133463542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,128,0,1,float16,float16,0,0.24875734249750772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,128,0,1,float16,fp8,0,0.44285333156585693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,128,0,1,float16,fp8,0,0.252128005027771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,128,0,1,fp8,fp8,0,0.39468268553415936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,128,0,1,fp8,fp8,0,0.22562666734059653
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,128,0,1,float16,float16,0,0.23164800802866617
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,128,0,1,float16,fp8,0,0.23201066255569458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,128,0,1,fp8,fp8,0,0.20914665857950845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,128,0,1,float16,float16,0,0.23525333404541016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,128,0,1,float16,fp8,0,0.23840532700220743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,128,0,1,fp8,fp8,0,0.21613333622614542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,128,0,1,float16,float16,0,0.14274666706720987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,128,0,1,float16,fp8,0,0.14297599593798319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,128,0,1,fp8,fp8,0,0.1327893336613973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,128,0,1,float16,fp8,0,0.12896533807118735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,128,0,1,float16,float16,0,0.12954666217168173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,128,0,1,fp8,fp8,0,0.11733333269755046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,128,0,1,float16,float16,0,0.13427199920018515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,128,0,1,float16,fp8,0,0.1328000028928121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,128,0,1,fp8,fp8,0,0.12450666228930156
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,128,0,1,float16,float16,0,0.0857919951279958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,128,0,1,float16,fp8,0,0.08734933535257976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,128,0,1,fp8,fp8,0,0.08312533299128215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,128,0,1,float16,float16,0,0.08269866804281871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,128,0,1,float16,fp8,0,0.08473599950472514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,128,0,1,fp8,fp8,0,0.07706666489442189
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,128,0,1,float16,float16,0,0.08339200417200725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,128,0,1,float16,fp8,0,0.08480532964070638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,128,0,1,fp8,fp8,0,0.07743466893831889
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,128,0,1,float16,float16,0,0.062496001521746315
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,128,0,1,float16,fp8,0,0.06248533229033152
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,128,0,1,fp8,fp8,0,0.06011733412742615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,128,0,1,float16,float16,0,0.06214933097362518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,128,0,1,float16,fp8,0,0.06214933097362518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,128,0,1,fp8,fp8,0,0.0595360000928243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,128,0,1,float16,float16,0,0.06217066446940104
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,128,0,1,float16,fp8,0,0.06243200103441874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,128,0,1,fp8,fp8,0,0.060080001751581825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,128,0,1,float16,float16,0,0.05826666454474131
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,128,0,1,float16,fp8,0,0.0603413333495458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,128,0,1,fp8,fp8,0,0.05624000231424967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,128,0,1,float16,float16,0,0.0584853341182073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,128,0,1,float16,fp8,0,0.06026133398214976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,128,0,1,fp8,fp8,0,0.05589333176612854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,128,0,1,float16,float16,0,0.05845333139101664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,128,0,1,float16,fp8,0,0.058431997895240784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,128,0,1,fp8,fp8,0,0.054431999723116554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,128,0,1,float16,float16,0,0.49506668249766034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,128,0,1,fp8,fp8,0,0.44097065925598145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,128,0,1,float16,fp8,0,0.49642666180928546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,128,0,1,float16,float16,0,0.5006719827651978
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,128,0,1,float16,fp8,0,0.5082453489303589
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,128,0,1,fp8,fp8,0,0.45135998725891113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,128,0,1,float16,float16,0,0.2804960012435913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,128,0,1,float16,fp8,0,0.28221867481867474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,128,0,1,fp8,fp8,0,0.2560906608899434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,128,0,1,float16,float16,0,0.25465599695841473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,128,0,1,fp8,fp8,0,0.23308799664179483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,128,0,1,float16,fp8,0,0.25781333446502686
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,128,0,1,float16,float16,0,0.26265066862106323
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,128,0,1,float16,fp8,0,0.26627200841903687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,128,0,1,fp8,fp8,0,0.2395253380139669
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,128,0,1,float16,float16,0,0.15269866585731506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,128,0,1,float16,fp8,0,0.1541973352432251
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,128,0,1,fp8,fp8,0,0.14131733775138855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,128,0,1,float16,float16,0,0.13303466637929282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,128,0,1,fp8,fp8,0,0.1250879963239034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,128,0,1,float16,fp8,0,0.13351999719937643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,128,0,1,float16,float16,0,0.1431893308957418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,128,0,1,fp8,fp8,0,0.13354667027791342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,128,0,1,float16,fp8,0,0.1444960037867228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,128,0,1,float16,float16,0,0.08581866820653279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,128,0,1,float16,fp8,0,0.08712533116340637
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,128,0,1,fp8,fp8,0,0.08360000451405843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,128,0,1,float16,float16,0,0.08082666496435802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,128,0,1,float16,fp8,0,0.08307200173536937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,128,0,1,fp8,fp8,0,0.07505066692829132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,128,0,1,float16,float16,0,0.08086400230725606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,128,0,1,float16,fp8,0,0.08340266346931458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,128,0,1,fp8,fp8,0,0.07669333120187123
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,128,0,1,float16,float16,0,0.05373866856098175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,128,0,1,float16,fp8,0,0.05412266651789347
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,128,0,1,fp8,fp8,0,0.05124266445636749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,128,0,1,float16,fp8,0,0.05256533126036326
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,128,0,1,float16,float16,0,0.0524586687485377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,128,0,1,fp8,fp8,0,0.04987200101216634
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,128,0,1,float16,float16,0,0.05205333232879639
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,128,0,1,float16,fp8,0,0.05227200190226237
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,128,0,1,float16,float16,0,0.04747733473777771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,128,0,1,float16,fp8,0,0.04797866443792979
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,128,0,1,fp8,fp8,0,0.05076266825199127
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,128,0,1,fp8,fp8,0,0.04433066646258036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,128,0,1,float16,float16,0,0.046575998266537987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,128,0,1,float16,fp8,0,0.04598399996757507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,128,0,1,fp8,fp8,0,0.045066664616266884
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,128,0,1,float16,float16,0,0.04599999884764353
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,128,0,1,float16,fp8,0,0.047872001926104225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,128,0,1,fp8,fp8,0,0.04603200157483419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,128,0,1,float16,float16,0,0.045791998505592346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,128,0,1,float16,fp8,0,0.04541866481304169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,128,0,1,fp8,fp8,0,0.043136000633239746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,128,0,1,float16,float16,0,0.04574933151404063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,128,0,1,float16,fp8,0,0.04608533283074697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,128,0,1,fp8,fp8,0,0.04167999823888143
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,128,0,1,float16,float16,0,0.043840001026789345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,128,0,1,float16,fp8,0,0.04398400088151296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,128,0,1,fp8,fp8,0,0.04347200194994608
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,128,0,1,float16,float16,0,0.3253546754519145
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,128,0,1,fp8,fp8,0,0.29149333635965985
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,128,0,1,float16,fp8,0,0.32712533076604206
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,128,0,1,float16,float16,0,0.3320266604423523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,128,0,1,float16,float16,0,0.18976000944773355
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,128,0,1,fp8,fp8,0,0.30242133140563965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,128,0,1,float16,fp8,0,0.3349013328552246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,128,0,1,float16,fp8,0,0.19332265853881836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,128,0,1,fp8,fp8,0,0.1771786610285441
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,128,0,1,float16,float16,0,0.17214399576187134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,128,0,1,float16,fp8,0,0.17229332526524863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,128,0,1,fp8,fp8,0,0.16005866726239523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,128,0,1,float16,float16,0,0.1802133321762085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,128,0,1,float16,float16,0,0.10616000493367513
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,128,0,1,float16,fp8,0,0.1824959913889567
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,128,0,1,fp8,fp8,0,0.1645813286304474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,128,0,1,float16,fp8,0,0.10784000158309937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,128,0,1,fp8,fp8,0,0.10319999853769939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,128,0,1,float16,float16,0,0.09437867005666097
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,128,0,1,float16,fp8,0,0.09594133496284485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,128,0,1,fp8,fp8,0,0.08755200107892354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,128,0,1,float16,float16,0,0.09692800045013428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,128,0,1,float16,fp8,0,0.10000000397364299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,128,0,1,fp8,fp8,0,0.0932426651318868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,128,0,1,float16,float16,0,0.06228266656398773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,128,0,1,float16,fp8,0,0.06412266691525777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,128,0,1,float16,float16,0,0.060122668743133545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,128,0,1,fp8,fp8,0,0.05942399799823761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,128,0,1,float16,fp8,0,0.06010666489601135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,128,0,1,fp8,fp8,0,0.055999999245007835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,128,0,1,float16,float16,0,0.06041066845258077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,128,0,1,float16,fp8,0,0.06225599845250448
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,128,0,1,float16,float16,0,0.04377600053946177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,128,0,1,fp8,fp8,0,0.05801066756248474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,128,0,1,float16,fp8,0,0.04398933549722036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,128,0,1,float16,float16,0,0.04402133325735728
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,128,0,1,fp8,fp8,0,0.04164266586303711
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,128,0,1,float16,fp8,0,0.043866669138272606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,128,0,1,fp8,fp8,0,0.0414986660083135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,128,0,1,float16,float16,0,0.04378666480382284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,128,0,1,float16,fp8,0,0.043680002291997276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,128,0,1,float16,float16,0,0.03975466638803482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,128,0,1,fp8,fp8,0,0.04204800228277842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,128,0,1,float16,fp8,0,0.04185600082079569
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,128,0,1,fp8,fp8,0,0.03798400113979975
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,128,0,1,float16,float16,0,0.039861333866914116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,128,0,1,float16,fp8,0,0.03976533313592275
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,128,0,1,fp8,fp8,0,0.037845333417256675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,128,0,1,float16,float16,0,0.03966933240493139
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,128,0,1,float16,fp8,0,0.04014399896065394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,128,0,1,fp8,fp8,0,0.037845333417256675
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,128,0,1,float16,float16,0,0.039135999977588654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,128,0,1,float16,fp8,0,0.037615999579429626
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,128,0,1,fp8,fp8,0,0.03642133375008901
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,128,0,1,float16,float16,0,0.03794133414824804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,128,0,1,float16,fp8,0,0.03766400118668874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,128,0,1,fp8,fp8,0,0.0356480007370313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,128,0,1,float16,float16,0,0.03741333385308584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,128,0,1,float16,fp8,0,0.0378560001651446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,128,0,1,fp8,fp8,0,0.03589866558710734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,128,0,1,float16,float16,0,0.3349813222885132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,128,0,1,float16,fp8,0,0.32838932673136395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,128,0,1,fp8,fp8,0,0.33078400293986004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,128,0,1,float16,float16,0,0.34093332290649414
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,128,0,1,float16,fp8,0,0.3364693323771159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,128,0,1,fp8,fp8,0,0.3349920113881429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,128,0,1,float16,float16,0,0.19315199057261148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,128,0,1,float16,fp8,0,0.18804800510406494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,128,0,1,fp8,fp8,0,0.18658133347829184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,128,0,1,float16,float16,0,0.1768959959348043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,128,0,1,float16,fp8,0,0.17379732926686606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,128,0,1,fp8,fp8,0,0.17428799470265707
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,128,0,1,float16,float16,0,0.18289599816004434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,128,0,1,float16,fp8,0,0.17778666814168295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,128,0,1,fp8,fp8,0,0.17948800325393677
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,128,0,1,float16,float16,0,0.11010133226712544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,128,0,1,float16,fp8,0,0.10783466696739197
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,128,0,1,fp8,fp8,0,0.10713600118954976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,128,0,1,float16,float16,0,0.09662933150927226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,128,0,1,fp8,fp8,0,0.09284800291061401
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,128,0,1,float16,float16,0,0.10325866937637329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,128,0,1,float16,fp8,0,0.09577600161234538
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,128,0,1,float16,fp8,0,0.09796266754468282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,128,0,1,fp8,fp8,0,0.10095999638239543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,128,0,1,float16,float16,0,0.06132266422112783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,128,0,1,float16,fp8,0,0.06095466514428457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,128,0,1,fp8,fp8,0,0.06043733159701029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,128,0,1,float16,float16,0,0.05816533168156942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,128,0,1,float16,fp8,0,0.057999998331069946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,128,0,1,fp8,fp8,0,0.054341331124305725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,128,0,1,float16,float16,0,0.05806933343410492
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,128,0,1,float16,fp8,0,0.05811200042565664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,128,0,1,fp8,fp8,0,0.056159997979799904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,128,0,1,float16,fp8,0,0.038618666430314384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,128,0,1,float16,float16,0,0.03843733419974645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,128,0,1,fp8,fp8,0,0.03790933390458425
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,128,0,1,float16,float16,0,0.038245332737763725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,128,0,1,float16,fp8,0,0.03756266583998998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,128,0,1,fp8,fp8,0,0.0374293327331543
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,128,0,1,float16,float16,0,0.037434667348861694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,128,0,1,float16,fp8,0,0.03751466671625773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,128,0,1,fp8,fp8,0,0.03779733429352442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,128,0,1,float16,float16,0,0.03365333378314972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,128,0,1,float16,fp8,0,0.03348266581694285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,128,0,1,fp8,fp8,0,0.03150933235883713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,128,0,1,float16,float16,0,0.031632001201311745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,128,0,1,float16,fp8,0,0.0331839993596077
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,128,0,1,float16,float16,0,0.03176533430814743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,128,0,1,fp8,fp8,0,0.031498665610949196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,128,0,1,float16,fp8,0,0.031471999982992806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,128,0,1,fp8,fp8,0,0.031712000568707786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,128,0,1,float16,float16,0,0.029546665648619335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,128,0,1,float16,fp8,0,0.031541332602500916
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,128,0,1,fp8,fp8,0,0.029365333418051403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,128,0,1,float16,float16,0,0.029717333614826202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,128,0,1,float16,fp8,0,0.031397332747777305
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,128,0,1,fp8,fp8,0,0.027535999814669292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,128,0,1,float16,float16,0,0.029839999973773956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,128,0,1,float16,fp8,0,0.03107200066248576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,128,0,1,fp8,fp8,0,0.027615999182065327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,128,0,1,float16,float16,0,0.02905600021282832
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,128,0,1,float16,fp8,0,0.029663999875386555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,128,0,1,fp8,fp8,0,0.02749866743882497
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,128,0,1,float16,float16,0,0.029450667401154835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,128,0,1,float16,fp8,0,0.029663999875386555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,128,0,1,fp8,fp8,0,0.027349332968393963
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,128,0,1,float16,float16,0,0.029461334149042766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,128,0,1,float16,fp8,0,0.029365333418051403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,128,0,1,fp8,fp8,0,0.027461332579453785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,128,0,1,float16,float16,0,0.27980266014734906
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,128,0,1,float16,fp8,0,0.27243733406066895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,128,0,1,fp8,fp8,0,0.29015467564264935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,128,0,1,float16,float16,0,0.28778666257858276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,128,0,1,float16,fp8,0,0.2842986583709717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,128,0,1,fp8,fp8,0,0.29492799441019696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,128,0,1,float16,float16,0,0.16564266880353293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,128,0,1,float16,fp8,0,0.16426666577657065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,128,0,1,fp8,fp8,0,0.16359999775886536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,128,0,1,float16,fp8,0,0.14591466387112936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,128,0,1,float16,float16,0,0.1483840048313141
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,128,0,1,fp8,fp8,0,0.15180266896883646
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,128,0,1,float16,fp8,0,0.15249066551526388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,128,0,1,float16,float16,0,0.15533333023389181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,128,0,1,fp8,fp8,0,0.15496533115704855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,128,0,1,float16,float16,0,0.0962559978167216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,128,0,1,float16,fp8,0,0.09357333183288574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,128,0,1,fp8,fp8,0,0.0940106709798177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,128,0,1,float16,float16,0,0.08137600123882294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,128,0,1,float16,fp8,0,0.07857066889603932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,128,0,1,fp8,fp8,0,0.08070933322111766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,128,0,1,float16,float16,0,0.08333866794904073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,128,0,1,float16,fp8,0,0.08349866668383281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,128,0,1,fp8,fp8,0,0.0869813362757365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,128,0,1,float16,float16,0,0.051967998345692955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,128,0,1,float16,fp8,0,0.05190933247407278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,128,0,1,fp8,fp8,0,0.052042668064435325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,128,0,1,float16,float16,0,0.048138668139775596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,128,0,1,float16,fp8,0,0.049679999550183616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,128,0,1,fp8,fp8,0,0.045968001087506614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,128,0,1,float16,float16,0,0.04804266492525736
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,128,0,1,float16,fp8,0,0.04966400067011515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,128,0,1,fp8,fp8,0,0.04623466730117798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,128,0,1,float16,float16,0,0.033861334125200905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,128,0,1,float16,fp8,0,0.033530667424201965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,128,0,1,fp8,fp8,0,0.03377600014209747
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,128,0,1,float16,float16,0,0.031343999008337654
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,128,0,1,float16,fp8,0,0.03169066707293192
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,128,0,1,fp8,fp8,0,0.03180266668399175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,128,0,1,float16,float16,0,0.032144000132878624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,128,0,1,float16,fp8,0,0.03186133255561193
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,128,0,1,fp8,fp8,0,0.03170666595300039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,128,0,1,float16,float16,0,0.02770666778087616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,128,0,1,float16,fp8,0,0.027674667537212372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,128,0,1,fp8,fp8,0,0.02717333287000656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,128,0,1,float16,float16,0,0.025722667574882507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,128,0,1,float16,fp8,0,0.02754666656255722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,128,0,1,fp8,fp8,0,0.025381334125995636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,128,0,1,float16,float16,0,0.025381334125995636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,128,0,1,float16,fp8,0,0.02757333219051361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,128,0,1,fp8,fp8,0,0.027280000348885853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,128,0,1,float16,float16,0,0.02325333406527837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,128,0,1,float16,fp8,0,0.025381334125995636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,128,0,1,float16,float16,0,0.02515200028816859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,128,0,1,fp8,fp8,0,0.023658665517965954
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,128,0,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,128,0,1,float16,float16,0,0.023498666783173878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,128,0,1,float16,fp8,0,0.025466665625572205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,128,0,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,128,0,1,fp8,fp8,0,0.02405333270629247
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,128,0,1,float16,float16,0,0.02330133318901062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,128,0,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,128,0,1,fp8,fp8,0,0.02143466720978419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,128,0,1,float16,float16,0,0.02386666586001714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,128,0,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,128,0,1,fp8,fp8,0,0.02163733293612798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,128,0,1,float16,float16,0,0.023141334454218548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,128,0,1,float16,fp8,0,0.02334933231274287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,128,0,1,float16,float16,0,0.022997332115968067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,128,0,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,128,0,1,float16,fp8,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,128,0,1,fp8,fp8,0,0.021290667355060577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,128,0,1,float16,float16,0,0.023344000180562336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,128,0,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,128,0,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,128,0,1,float16,float16,0,0.023711999257405598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,128,0,1,float16,fp8,0,0.023589332898457844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,128,0,1,fp8,fp8,0,0.021082667013009388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,128,0,1,float16,float16,0,0.13173866271972656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,128,0,1,float16,float16,0,0.13774933417638144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,128,0,1,fp8,fp8,0,0.1425386667251587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,128,0,1,float16,fp8,0,0.13033599654833475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,128,0,1,float16,fp8,0,0.1358560025691986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,128,0,1,float16,float16,0,0.08839466174443562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,128,0,1,float16,fp8,0,0.08698667089144389
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,128,0,1,fp8,fp8,0,0.1467573344707489
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,128,0,1,fp8,fp8,0,0.08703999718030293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,128,0,1,float16,float16,0,0.07331199944019318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,128,0,1,float16,fp8,0,0.07256000240643819
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,128,0,1,fp8,fp8,0,0.07598400115966797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,128,0,1,float16,float16,0,0.07517866790294647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,128,0,1,float16,fp8,0,0.07463466624418895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,128,0,1,float16,float16,0,0.04474133253097534
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,128,0,1,fp8,fp8,0,0.08126399914423625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,128,0,1,float16,fp8,0,0.043663998444875084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,128,0,1,fp8,fp8,0,0.045882667104403176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,128,0,1,float16,float16,0,0.04203199843565623
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,128,0,1,float16,fp8,0,0.0422026664018631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,128,0,1,fp8,fp8,0,0.04222933451334635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,128,0,1,float16,float16,0,0.04310933252175649
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,128,0,1,float16,fp8,0,0.04279466470082601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,128,0,1,float16,float16,0,0.03147733211517334
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,128,0,1,float16,fp8,0,0.02998399982849757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,128,0,1,fp8,fp8,0,0.043578664461771645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,128,0,1,fp8,fp8,0,0.031504000226656594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,128,0,1,float16,float16,0,0.029264000554879505
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,128,0,1,float16,fp8,0,0.02957333376010259
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,128,0,1,fp8,fp8,0,0.029535998900731403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,128,0,1,float16,float16,0,0.029301332930723827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,128,0,1,float16,fp8,0,0.029189333319664
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,128,0,1,float16,float16,0,0.024112001061439514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,128,0,1,fp8,fp8,0,0.029477333029111225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,128,0,1,float16,fp8,0,0.02571200082699458
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,128,0,1,fp8,fp8,0,0.02327999969323476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,128,0,1,float16,float16,0,0.023365333676338196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,128,0,1,float16,fp8,0,0.025034666061401367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,128,0,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,128,0,1,float16,float16,0,0.02348800003528595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,128,0,1,float16,fp8,0,0.02521066615978877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,128,0,1,fp8,fp8,0,0.023306667804718018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,128,0,1,float16,float16,0,0.02163200080394745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,128,0,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,128,0,1,fp8,fp8,0,0.021221332252025604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,128,0,1,float16,float16,0,0.021402666966120403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,128,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,128,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,128,0,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,128,0,1,float16,fp8,0,0.021562665700912476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,128,0,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,128,0,1,float16,float16,0,0.020400000115235645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,128,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,128,0,1,fp8,fp8,0,0.02094399929046631
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,128,0,1,float16,float16,0,0.019167999426523846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,128,0,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,128,0,1,float16,fp8,0,0.020741333564122517
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,128,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,128,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,128,0,1,float16,float16,0,0.01913600042462349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,128,0,1,fp8,fp8,0,0.020917333662509918
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,128,0,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,128,0,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,128,0,1,float16,float16,0,0.021269333859284718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,128,0,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,128,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,128,0,1,float16,float16,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,128,0,1,float16,fp8,0,0.01969066634774208
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,128,0,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,128,0,1,float16,float16,0,0.021327999730904896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,128,0,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,128,0,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,128,0,1,float16,float16,0,0.01937599976857503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,128,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,128,0,1,fp8,fp8,0,0.02065066620707512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,128,0,1,float16,float16,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,128,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,128,0,1,fp8,fp8,0,0.018757333358128864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,128,0,1,float16,float16,0,0.07626666625340779
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,128,0,1,float16,fp8,0,0.07572266459465027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,128,0,1,fp8,fp8,0,0.07840533554553986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,128,0,1,float16,float16,0,0.0784693310658137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,128,0,1,float16,fp8,0,0.0764160007238388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,128,0,1,fp8,fp8,0,0.08462933699289958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,128,0,1,float16,fp8,0,0.04632533093293508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,128,0,1,float16,float16,0,0.04808000226815542
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,128,0,1,fp8,fp8,0,0.05037866532802582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,128,0,1,float16,fp8,0,0.045824001232783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,128,0,1,fp8,fp8,0,0.043824002146720886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,128,0,1,float16,float16,0,0.04590400060017904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,128,0,1,float16,float16,0,0.04630400240421295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,128,0,1,float16,fp8,0,0.0440533310174942
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,128,0,1,fp8,fp8,0,0.043840001026789345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,128,0,1,float16,float16,0,0.03146666785081228
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,128,0,1,float16,fp8,0,0.03176533430814743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,128,0,1,float16,float16,0,0.02942399928967158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,128,0,1,fp8,fp8,0,0.0310506671667099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,128,0,1,float16,fp8,0,0.029685333371162415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,128,0,1,fp8,fp8,0,0.02959466725587845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,128,0,1,fp8,fp8,0,0.029205332199732464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,128,0,1,float16,float16,0,0.031845333675543465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,128,0,1,float16,fp8,0,0.02932800104220708
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,128,0,1,float16,float16,0,0.021802666286627453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,128,0,1,float16,fp8,0,0.021429332594076794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,128,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,128,0,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,128,0,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,128,0,1,float16,fp8,0,0.02163733293612798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,128,0,1,float16,float16,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,128,0,1,float16,fp8,0,0.02186666677395503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,128,0,1,float16,float16,0,0.019130667050679524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,128,0,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,128,0,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,128,0,1,float16,float16,0,0.018101333330074947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,128,0,1,float16,fp8,0,0.018138666947682697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,128,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,128,0,1,float16,float16,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,128,0,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,128,0,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,128,0,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,128,0,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,128,0,1,float16,float16,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,128,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,128,0,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,128,0,1,float16,float16,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,128,0,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,128,0,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,128,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,128,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,128,0,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,128,0,1,float16,float16,0,0.017445333302021027
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,128,0,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,128,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,128,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,128,0,1,float16,float16,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,128,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,128,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,128,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,128,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,128,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,128,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,128,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,128,0,1,float16,float16,0,0.052000001072883606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,128,0,1,float16,fp8,0,0.05228800078233083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,128,0,1,fp8,fp8,0,0.050111999114354454
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,128,0,1,float16,float16,0,0.05372266471385956
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,128,0,1,float16,fp8,0,0.05243733525276184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,128,0,1,fp8,fp8,0,0.052560001611709595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,128,0,1,float16,float16,0,0.03583466758330663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,128,0,1,float16,fp8,0,0.03595199932654699
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,128,0,1,fp8,fp8,0,0.03618133316437403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,128,0,1,float16,float16,0,0.03561066587766012
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,128,0,1,float16,fp8,0,0.0355679988861084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,128,0,1,fp8,fp8,0,0.03461866577466329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,128,0,1,float16,float16,0,0.035616000493367515
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,128,0,1,float16,fp8,0,0.035599999129772186
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,128,0,1,fp8,fp8,0,0.03572800010442734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,128,0,1,float16,float16,0,0.02380799998839696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,128,0,1,float16,fp8,0,0.02497600018978119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,128,0,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,128,0,1,float16,float16,0,0.02325333406527837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,128,0,1,float16,fp8,0,0.023285334308942158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,128,0,1,fp8,fp8,0,0.02325333406527837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,128,0,1,float16,float16,0,0.023306667804718018
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,128,0,1,float16,fp8,0,0.023610666394233704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,128,0,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,128,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,128,0,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,128,0,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,128,0,1,float16,float16,0,0.018986667195955913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,128,0,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,128,0,1,fp8,fp8,0,0.018800000349680584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,128,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,128,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,128,0,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,128,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,128,0,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,128,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,128,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,128,0,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,128,0,1,fp8,fp8,0,0.01785600061217944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,128,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,128,0,1,float16,fp8,0,0.018063999712467194
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,128,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,128,0,1,float16,float16,0,0.017450666675964992
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,128,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,128,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,128,0,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,128,0,1,float16,float16,0,0.016890666137139004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,128,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,128,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,128,0,1,float16,float16,0,0.016607999801635742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,128,0,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,128,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,128,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,128,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,128,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,128,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,128,0,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,128,0,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,128,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,128,0,1,float16,float16,0,0.015834666788578033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,128,0,1,float16,float16,0,0.04381866753101349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,128,0,1,float16,fp8,0,0.04436799883842468
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,128,0,1,fp8,fp8,0,0.04214933514595032
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,128,0,1,float16,float16,0,0.04391466577847799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,128,0,1,float16,fp8,0,0.04541866481304169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,128,0,1,fp8,fp8,0,0.041840001940727234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,128,0,1,float16,float16,0,0.029333333174387615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,128,0,1,float16,fp8,0,0.02958933264017105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,128,0,1,fp8,fp8,0,0.029498666524887085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,128,0,1,float16,float16,0,0.02926933268706004
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,128,0,1,float16,fp8,0,0.029167999823888142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,128,0,1,fp8,fp8,0,0.02759466568628947
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,128,0,1,float16,float16,0,0.02959466725587845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,128,0,1,float16,fp8,0,0.029690665503342945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,128,0,1,fp8,fp8,0,0.02739733209212621
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,128,0,1,float16,float16,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,128,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,128,0,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,128,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,128,0,1,float16,fp8,0,0.021402666966120403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,128,0,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,128,0,1,float16,float16,0,0.021082667013009388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,128,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,128,0,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,128,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,128,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,128,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,128,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,128,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,128,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,128,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,128,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,128,0,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,128,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,128,0,1,float16,fp8,0,0.017456000049908955
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,128,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,128,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,128,0,1,float16,float16,0,0.015466666469971338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,128,0,1,float16,fp8,0,0.017530667285124462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,128,0,1,fp8,fp8,0,0.01617066686352094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,128,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,128,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,128,0,1,fp8,fp8,0,0.015552000453074774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,128,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,128,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,128,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,128,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,128,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,128,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,128,0,1,float16,fp8,0,0.015861333658297855
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,128,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,128,0,1,float16,fp8,0,0.01540800059835116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,128,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,128,0,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,128,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,128,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,128,0,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,128,0,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,128,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,128,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,128,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,128,0,1,float16,float16,0,0.03777066618204117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,128,0,1,float16,fp8,0,0.037471999724706016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,128,0,1,fp8,fp8,0,0.03777066618204117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,128,0,1,float16,float16,0,0.03783999880154928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,128,0,1,float16,fp8,0,0.03758399933576584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,128,0,1,fp8,fp8,0,0.03608533243338267
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,128,0,1,float16,float16,0,0.027274665733178455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,128,0,1,float16,fp8,0,0.027269333600997925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,128,0,1,fp8,fp8,0,0.025381334125995636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,128,0,1,float16,float16,0,0.02554133286078771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,128,0,1,float16,fp8,0,0.027290667096773785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,128,0,1,fp8,fp8,0,0.025226667523384094
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,128,0,1,float16,float16,0,0.025642665723959606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,128,0,1,float16,fp8,0,0.027376001079877216
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,128,0,1,fp8,fp8,0,0.025573333104451496
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,128,0,1,float16,float16,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,128,0,1,float16,fp8,0,0.021205333371957142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,128,0,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,128,0,1,float16,float16,0,0.019519999623298645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,128,0,1,float16,fp8,0,0.019141333798567455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,128,0,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,128,0,1,float16,float16,0,0.02103466788927714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,128,0,1,float16,fp8,0,0.019551999866962433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,128,0,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,128,0,1,float16,float16,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,128,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,128,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,128,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,128,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,128,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,128,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,128,0,1,fp8,fp8,0,0.01632533346613248
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,128,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,128,0,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,128,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,128,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,128,0,1,float16,fp8,0,0.01629866659641266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,128,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,128,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,128,0,1,fp8,fp8,0,0.015909332782030106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,128,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,128,0,1,fp8,fp8,0,0.015978666643301647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,128,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,128,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,128,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,128,0,1,float16,fp8,0,0.016106666376193363
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,128,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,128,0,1,float16,fp8,0,0.016549333930015564
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,128,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,128,0,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,128,0,1,float16,float16,0,0.018506667266289394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,128,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,128,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,128,0,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,128,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,128,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,128,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,0,0.033770665526390076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,0,0.03365333378314972
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,4,1,128,0,1,fp8,fp8,0,0.03358400116364161
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,0,0.033973333736260734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,0,0.025279998779296875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,4,2,128,0,1,fp8,fp8,0,0.0329120010137558
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,0,0.034485332667827606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,0,0.02533866713444392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,4,128,0,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,0,0.025978667040665943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,1,128,0,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,0,0.025050667424996693
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,4,2,128,0,1,fp8,fp8,0,0.025461333493391674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,0,0.023168000082174938
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,0,0.023397333920001984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,4,128,0,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,0,0.023232000569502514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,1,128,0,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,0,0.02179199953873952
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,0,0.021205333371957142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,0,0.021525333325068157
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,4,2,128,0,1,fp8,fp8,0,0.019648000597953796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,4,128,0,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,1,128,0,1,fp8,fp8,0,0.017786666750907898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,0,0.01889066646496455
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,0,0.01739199956258138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,4,2,128,0,1,fp8,fp8,0,0.017717332889636356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,4,128,0,1,fp8,fp8,0,0.01748266691962878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,0,0.017535999417304993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,1,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,0,0.017562666287024815
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,4,2,128,0,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,4,128,0,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,1,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,4,2,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,0,0.015402667224407196
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,4,128,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,1,128,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,4,2,128,0,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,4,128,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,0,0.015461333096027374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,1,128,0,1,fp8,fp8,0,0.01553600033124288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,0,0.01479999969402949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,4,2,128,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,4,128,0,1,fp8,fp8,0,0.016602666427691776
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,1,128,0,1,fp8,fp8,0,0.01565333331624667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,4,2,128,0,1,fp8,fp8,0,0.015989333391189575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,128,0,1,float16,float16,0,0.9402186870574951
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,128,0,1,fp8,fp8,0,0.8151146570841471
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,128,0,1,float16,fp8,0,0.943008025487264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,128,0,1,float16,fp8,0,0.5566293398539225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,128,0,1,float16,float16,0,0.554688016573588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,128,0,1,fp8,fp8,0,0.4888373215993245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,128,0,1,float16,float16,0,0.5437920093536377
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,128,0,1,fp8,fp8,0,0.4795893430709839
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,128,0,1,float16,fp8,0,0.5445173184076945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,128,0,1,fp8,fp8,0,0.3102506597836812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,128,0,1,float16,fp8,0,0.3495413462320964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,128,0,1,float16,float16,0,0.34638933340708417
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,128,0,1,float16,float16,0,0.34672534465789795
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,128,0,1,float16,float16,0,0.2259999910990397
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,128,0,1,fp8,fp8,0,0.3054773410161336
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,128,0,1,float16,fp8,0,0.34460266431172687
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,128,0,1,float16,fp8,0,0.2296746571858724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,128,0,1,fp8,fp8,0,0.2059626579284668
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,128,0,1,float16,float16,0,0.22467732429504395
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,128,0,1,float16,fp8,0,0.22658665974934897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,128,0,1,fp8,fp8,0,0.20613867044448853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,128,0,1,fp8,fp8,0,0.5174346764882406
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,128,0,1,float16,float16,0,0.5905226469039917
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,128,0,1,float16,fp8,0,0.5936799844106039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,128,0,1,float16,float16,0,0.36001066366831463
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,128,0,1,fp8,fp8,0,0.3179519971211751
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,128,0,1,float16,fp8,0,0.35753067334493
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,128,0,1,float16,float16,0,0.3494933446248372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,128,0,1,float16,fp8,0,0.3511039813359578
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,128,0,1,float16,float16,0,0.22297066450119019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,128,0,1,float16,fp8,0,0.2277066707611084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,128,0,1,fp8,fp8,0,0.31013866265614826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,128,0,1,fp8,fp8,0,0.20454933245976767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,128,0,1,float16,float16,0,0.22136000792185465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,128,0,1,float16,fp8,0,0.22492265701293945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,128,0,1,fp8,fp8,0,0.19891732931137085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,128,0,1,float16,float16,0,0.17107200622558594
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,128,0,1,float16,fp8,0,0.17185600598653158
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,128,0,1,fp8,fp8,0,0.15895467003186545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,128,0,1,float16,float16,0,0.16895467042922974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,128,0,1,float16,fp8,0,0.16985066731770834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,128,0,1,fp8,fp8,0,0.15845866998036703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,128,0,1,float16,float16,0,0.445093313852946
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,128,0,1,float16,fp8,0,0.44887999693552655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,128,0,1,fp8,fp8,0,0.39233068625132245
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,128,0,1,float16,float16,0,0.2743893265724182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,128,0,1,float16,fp8,0,0.278656005859375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,128,0,1,fp8,fp8,0,0.24870934089024863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,128,0,1,float16,float16,0,0.2661386728286743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,128,0,1,float16,fp8,0,0.27134933074315387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,128,0,1,fp8,fp8,0,0.23917333285013834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,128,0,1,float16,float16,0,0.166810671488444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,128,0,1,float16,fp8,0,0.16893333196640015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,128,0,1,fp8,fp8,0,0.15011200308799744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,128,0,1,float16,float16,0,0.16573333740234375
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,128,0,1,float16,fp8,0,0.16843199729919434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,128,0,1,fp8,fp8,0,0.14828800161679587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,128,0,1,float16,float16,0,0.14408533771832785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,128,0,1,float16,fp8,0,0.14441600441932678
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,128,0,1,fp8,fp8,0,0.13578133781750998
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,128,0,1,float16,float16,0,0.14453333616256714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,128,0,1,float16,fp8,0,0.14516799648602804
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,128,0,1,fp8,fp8,0,0.13409599661827087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,128,0,1,float16,float16,0,0.5587466557820638
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,128,0,1,float16,fp8,0,0.5584799846013387
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,128,0,1,fp8,fp8,0,0.4848533471425374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,128,0,1,float16,float16,0,0.3309333324432373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,128,0,1,fp8,fp8,0,0.2902346650759379
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,128,0,1,float16,fp8,0,0.3289066751797994
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,128,0,1,float16,float16,0,0.3168586691220601
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,128,0,1,float16,fp8,0,0.31485867500305176
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,128,0,1,fp8,fp8,0,0.28468799591064453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,128,0,1,float16,float16,0,0.1968053380648295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,128,0,1,float16,fp8,0,0.19958933194478354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,128,0,1,fp8,fp8,0,0.18370133638381958
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,128,0,1,float16,float16,0,0.19399466117223105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,128,0,1,fp8,fp8,0,0.17542932430903116
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,128,0,1,float16,fp8,0,0.19580799341201782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,128,0,1,float16,float16,0,0.12618133425712585
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,128,0,1,float16,fp8,0,0.1279093325138092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,128,0,1,fp8,fp8,0,0.117658664782842
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,128,0,1,float16,float16,0,0.12575999895731607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,128,0,1,float16,fp8,0,0.1264533301194509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,128,0,1,fp8,fp8,0,0.11633599797884624
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,128,0,1,float16,float16,0,0.11937600374221802
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,128,0,1,float16,fp8,0,0.11973333358764648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,128,0,1,fp8,fp8,0,0.11158399780591328
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,128,0,1,float16,fp8,0,0.11954666177431743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,128,0,1,float16,float16,0,0.11822932958602905
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,128,0,1,fp8,fp8,0,0.11142399907112122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,128,0,1,float16,float16,0,0.35893332958221436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,128,0,1,float16,float16,0,0.21766932805379233
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,128,0,1,float16,fp8,0,0.3603359858194987
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,128,0,1,fp8,fp8,0,0.3242560029029846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,128,0,1,float16,fp8,0,0.2148053248723348
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,128,0,1,fp8,fp8,0,0.20201599597930908
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,128,0,1,float16,float16,0,0.203984002272288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,128,0,1,float16,fp8,0,0.20800000429153442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,128,0,1,fp8,fp8,0,0.18629332383473715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,128,0,1,float16,float16,0,0.13101866841316223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,128,0,1,float16,fp8,0,0.13409599661827087
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,128,0,1,fp8,fp8,0,0.12385599811871846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,128,0,1,float16,float16,0,0.13065066933631897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,128,0,1,float16,fp8,0,0.13195199767748514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,128,0,1,fp8,fp8,0,0.11760532855987549
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,128,0,1,float16,float16,0,0.09816533327102661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,128,0,1,float16,fp8,0,0.0993226667245229
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,128,0,1,fp8,fp8,0,0.09309867024421692
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,128,0,1,float16,float16,0,0.09730133414268494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,128,0,1,float16,fp8,0,0.09915199875831604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,128,0,1,fp8,fp8,0,0.09083732962608337
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,128,0,1,float16,float16,0,0.09291199843088786
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,128,0,1,float16,fp8,0,0.0940106709798177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,128,0,1,float16,float16,0,0.09284266829490662
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,128,0,1,float16,fp8,0,0.0937493344148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,128,0,1,fp8,fp8,0,0.08799999952316284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,128,0,1,fp8,fp8,0,0.08678399523099263
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,128,0,1,float16,float16,0,0.2145706613858541
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,128,0,1,float16,float16,0,0.36746132373809814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,128,0,1,fp8,fp8,0,0.33313600222269696
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,128,0,1,float16,fp8,0,0.37083200613657635
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,128,0,1,float16,fp8,0,0.21663999557495117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,128,0,1,fp8,fp8,0,0.19417067368825278
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,128,0,1,float16,fp8,0,0.20407466093699136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,128,0,1,float16,float16,0,0.2039146622021993
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,128,0,1,fp8,fp8,0,0.18659732739130655
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,128,0,1,float16,float16,0,0.12139733632405598
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,128,0,1,float16,fp8,0,0.1262079974015554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,128,0,1,fp8,fp8,0,0.11594133575757344
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,128,0,1,float16,float16,0,0.12036266922950745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,128,0,1,float16,fp8,0,0.12288533647855122
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,128,0,1,float16,float16,0,0.07718400160471599
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,128,0,1,fp8,fp8,0,0.10960533221562703
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,128,0,1,float16,fp8,0,0.07760533193747203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,128,0,1,fp8,fp8,0,0.07322666545708974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,128,0,1,float16,float16,0,0.07685333490371704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,128,0,1,float16,fp8,0,0.07673599819342296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,128,0,1,fp8,fp8,0,0.07025599976380666
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,128,0,1,float16,float16,0,0.07027733325958252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,128,0,1,float16,fp8,0,0.07055466870466869
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,128,0,1,fp8,fp8,0,0.06651733318964641
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,128,0,1,float16,float16,0,0.07041599849859874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,128,0,1,fp8,fp8,0,0.0658079981803894
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,128,0,1,float16,fp8,0,0.07030933101971944
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,128,0,1,float16,float16,0,0.0682666649421056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,128,0,1,float16,fp8,0,0.06843199829260509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,128,0,1,fp8,fp8,0,0.06412266691525777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,128,0,1,float16,float16,0,0.06816000243028005
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,128,0,1,float16,fp8,0,0.068271999557813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,128,0,1,fp8,fp8,0,0.06243200103441874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,128,0,1,float16,float16,0,0.24492265780766806
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,128,0,1,float16,fp8,0,0.2453599969546
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,128,0,1,fp8,fp8,0,0.21984533468882242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,128,0,1,float16,float16,0,0.14451733231544495
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,128,0,1,float16,fp8,0,0.1470080018043518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,128,0,1,fp8,fp8,0,0.13471466302871704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,128,0,1,float16,float16,0,0.1342026690642039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,128,0,1,float16,fp8,0,0.13541332880655924
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,128,0,1,fp8,fp8,0,0.12495999534924825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,128,0,1,float16,float16,0,0.08346666892369588
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,128,0,1,float16,fp8,0,0.08588799834251404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,128,0,1,fp8,fp8,0,0.0793333351612091
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,128,0,1,float16,float16,0,0.08260266482830048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,128,0,1,float16,fp8,0,0.08356799681981404
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,128,0,1,fp8,fp8,0,0.07574399809042613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,128,0,1,float16,float16,0,0.06239999830722809
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,128,0,1,float16,fp8,0,0.062352001667022705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,128,0,1,fp8,fp8,0,0.05820799867312113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,128,0,1,float16,float16,0,0.06033066908518473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,128,0,1,float16,fp8,0,0.061039999127388
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,128,0,1,fp8,fp8,0,0.057487999399503074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,128,0,1,float16,float16,0,0.05596266686916351
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,128,0,1,float16,fp8,0,0.058143998185793556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,128,0,1,fp8,fp8,0,0.05383466680844625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,128,0,1,float16,float16,0,0.05621333420276642
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,128,0,1,float16,fp8,0,0.058229332168896995
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,128,0,1,fp8,fp8,0,0.05446400245030721
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,128,0,1,float16,float16,0,0.054325332244237266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,128,0,1,float16,fp8,0,0.05590933561325073
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,128,0,1,fp8,fp8,0,0.05243733525276184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,128,0,1,float16,float16,0,0.056261335810025535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,128,0,1,float16,fp8,0,0.05563200016816457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,128,0,1,fp8,fp8,0,0.05182399849096934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,128,0,1,float16,float16,0,0.2670666575431824
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,128,0,1,fp8,fp8,0,0.24220800399780273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,128,0,1,float16,fp8,0,0.26716800530751544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,128,0,1,float16,float16,0,0.1536906659603119
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,128,0,1,float16,fp8,0,0.1553813318411509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,128,0,1,fp8,fp8,0,0.14362667004267374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,128,0,1,float16,float16,0,0.14326933026313782
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,128,0,1,float16,fp8,0,0.14498133460680643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,128,0,1,fp8,fp8,0,0.13383466998736063
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,128,0,1,float16,float16,0,0.0844640036424001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,128,0,1,float16,fp8,0,0.08738666772842407
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,128,0,1,fp8,fp8,0,0.08346133430798848
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,128,0,1,float16,float16,0,0.08262933293978374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,128,0,1,fp8,fp8,0,0.0764213353395462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,128,0,1,float16,fp8,0,0.08303999900817871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,128,0,1,float16,float16,0,0.053616002202034
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,128,0,1,float16,fp8,0,0.053930665055910744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,128,0,1,fp8,fp8,0,0.05189866820971171
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,128,0,1,float16,float16,0,0.052485331892967224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,128,0,1,float16,fp8,0,0.0531626691420873
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,128,0,1,fp8,fp8,0,0.05013866722583771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,128,0,1,float16,float16,0,0.04638933142026266
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,128,0,1,float16,fp8,0,0.047824000318845115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,128,0,1,fp8,fp8,0,0.044256001710891724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,128,0,1,float16,float16,0,0.04584000011285146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,128,0,1,float16,fp8,0,0.047770669062932335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,128,0,1,float16,float16,0,0.04390933116277059
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,128,0,1,fp8,fp8,0,0.04497066636880239
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,128,0,1,float16,fp8,0,0.04377600053946177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,128,0,1,fp8,fp8,0,0.04161600023508072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,128,0,1,float16,float16,0,0.043978666265805565
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,128,0,1,fp8,fp8,0,0.03956799954175949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,128,0,1,float16,fp8,0,0.04377600053946177
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,128,0,1,float16,float16,0,0.04370133578777313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,128,0,1,float16,fp8,0,0.04398933549722036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,128,0,1,fp8,fp8,0,0.039818666875362396
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,128,0,1,float16,float16,0,0.04348266621430715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,128,0,1,float16,fp8,0,0.043663998444875084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,128,0,1,fp8,fp8,0,0.04113066693147024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,128,0,1,float16,float16,0,0.10749866565068562
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,128,0,1,float16,float16,0,0.17918399969736734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,128,0,1,float16,fp8,0,0.18014933665593466
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,128,0,1,fp8,fp8,0,0.16543466846148172
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,128,0,1,float16,fp8,0,0.1071626643339793
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,128,0,1,float16,float16,0,0.0957493285338084
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,128,0,1,fp8,fp8,0,0.10312533378601074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,128,0,1,float16,fp8,0,0.09789866209030151
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,128,0,1,float16,float16,0,0.062405332922935486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,128,0,1,fp8,fp8,0,0.06018133461475372
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,128,0,1,float16,fp8,0,0.06433600187301636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,128,0,1,fp8,fp8,0,0.09278933207194011
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,128,0,1,float16,float16,0,0.06027733286221822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,128,0,1,float16,fp8,0,0.06201066573460897
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,128,0,1,float16,float16,0,0.04423466821511587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,128,0,1,fp8,fp8,0,0.05645333230495453
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,128,0,1,float16,fp8,0,0.04369066655635834
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,128,0,1,float16,float16,0,0.04354666670163473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,128,0,1,fp8,fp8,0,0.04355733096599579
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,128,0,1,float16,fp8,0,0.04399466514587402
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,128,0,1,fp8,fp8,0,0.04187199970086416
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,128,0,1,float16,fp8,0,0.039488000174363456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,128,0,1,float16,float16,0,0.038319999972979225
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,128,0,1,fp8,fp8,0,0.03766400118668874
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,128,0,1,float16,float16,0,0.03976000100374222
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,128,0,1,float16,fp8,0,0.03940266619126002
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,128,0,1,fp8,fp8,0,0.03579733272393545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,128,0,1,float16,float16,0,0.03749333322048187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,128,0,1,float16,fp8,0,0.03737066686153412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,128,0,1,fp8,fp8,0,0.03558400024970373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,128,0,1,float16,float16,0,0.037418665985266365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,128,0,1,float16,fp8,0,0.03568000098069509
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,128,0,1,fp8,fp8,0,0.03549866626660029
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,128,0,1,float16,float16,0,0.03612799942493439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,128,0,1,float16,fp8,0,0.035962666074434914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,128,0,1,fp8,fp8,0,0.03380800038576126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,128,0,1,float16,float16,0,0.03586666782697042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,128,0,1,float16,fp8,0,0.035989334185918175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,128,0,1,fp8,fp8,0,0.033701332906881966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,128,0,1,float16,float16,0,0.1791200041770935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,128,0,1,float16,fp8,0,0.17868266503016153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,128,0,1,fp8,fp8,0,0.1771626671155294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,128,0,1,float16,float16,0,0.10858133435249329
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,128,0,1,float16,fp8,0,0.10629866520563762
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,128,0,1,fp8,fp8,0,0.10541333754857381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,128,0,1,float16,float16,0,0.09941866993904114
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,128,0,1,float16,fp8,0,0.0962666670481364
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,128,0,1,fp8,fp8,0,0.09847467144330342
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,128,0,1,float16,float16,0,0.06101333101590475
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,128,0,1,float16,fp8,0,0.059658666451772056
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,128,0,1,fp8,fp8,0,0.06030400097370148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,128,0,1,float16,float16,0,0.05849599838256836
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,128,0,1,float16,fp8,0,0.05835733314355215
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,128,0,1,fp8,fp8,0,0.05606933434804281
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,128,0,1,float16,float16,0,0.03787733366092046
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,128,0,1,float16,fp8,0,0.037471999724706016
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,128,0,1,fp8,fp8,0,0.03751466671625773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,128,0,1,float16,float16,0,0.03702933341264725
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,128,0,1,float16,fp8,0,0.0378560001651446
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,128,0,1,fp8,fp8,0,0.03587199995915095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,128,0,1,float16,float16,0,0.033600000043710075
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,128,0,1,float16,fp8,0,0.034101332227389015
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,128,0,1,fp8,fp8,0,0.031386665999889374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,128,0,1,float16,float16,0,0.0332640012105306
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,128,0,1,float16,fp8,0,0.03197333216667175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,128,0,1,fp8,fp8,0,0.03162666658560435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,128,0,1,float16,float16,0,0.03148266673088074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,128,0,1,float16,fp8,0,0.029658667743206024
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,128,0,1,fp8,fp8,0,0.02938133229811986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,128,0,1,float16,float16,0,0.03142933299144109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,128,0,1,float16,fp8,0,0.031317333380381264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,128,0,1,fp8,fp8,0,0.029738667110602062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,128,0,1,float16,float16,0,0.029322666426499683
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,128,0,1,float16,fp8,0,0.029685333371162415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,128,0,1,fp8,fp8,0,0.027402666707833607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,128,0,1,float16,float16,0,0.029365333418051403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,128,0,1,float16,fp8,0,0.02945599953333537
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,128,0,1,fp8,fp8,0,0.027471999327341717
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,128,0,1,float16,float16,0,0.02850666642189026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,128,0,1,float16,fp8,0,0.029466666281223297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,128,0,1,fp8,fp8,0,0.0271573339899381
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,128,0,1,float16,float16,0,0.028927999238173168
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,128,0,1,float16,fp8,0,0.029696000119050343
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,128,0,1,fp8,fp8,0,0.027461332579453785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,128,0,1,float16,float16,0,0.15272000432014465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,128,0,1,float16,fp8,0,0.14721600214640299
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,128,0,1,fp8,fp8,0,0.1564906636873881
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,128,0,1,float16,float16,0,0.09308800101280212
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,128,0,1,float16,fp8,0,0.08950933814048767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,128,0,1,fp8,fp8,0,0.09302933017412822
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,128,0,1,float16,float16,0,0.08236800134181976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,128,0,1,float16,fp8,0,0.08030933141708374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,128,0,1,fp8,fp8,0,0.08888000249862671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,128,0,1,float16,float16,0,0.05193066596984863
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,128,0,1,float16,fp8,0,0.05007466673851013
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,128,0,1,fp8,fp8,0,0.052298665046691895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,128,0,1,float16,fp8,0,0.04825599988301595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,128,0,1,fp8,fp8,0,0.046629334489504494
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,128,0,1,float16,float16,0,0.04849066833655039
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,128,0,1,float16,float16,0,0.03141866624355316
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,128,0,1,float16,fp8,0,0.03180266668399175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,128,0,1,fp8,fp8,0,0.032431999842325844
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,128,0,1,float16,float16,0,0.0312266672650973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,128,0,1,float16,fp8,0,0.03194133440653483
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,128,0,1,fp8,fp8,0,0.03182400017976761
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,128,0,1,float16,float16,0,0.027765333652496338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,128,0,1,float16,fp8,0,0.02719466636578242
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,128,0,1,fp8,fp8,0,0.027386667827765148
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,128,0,1,float16,float16,0,0.027589333554108936
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,128,0,1,float16,fp8,0,0.027141332626342773
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,128,0,1,fp8,fp8,0,0.027317332724730175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,128,0,1,float16,fp8,0,0.024901332954565685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,128,0,1,float16,float16,0,0.025770666698614757
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,128,0,1,fp8,fp8,0,0.023242667317390442
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,128,0,1,float16,float16,0,0.025077333052953083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,128,0,1,float16,fp8,0,0.02364266663789749
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,128,0,1,float16,float16,0,0.023503998915354412
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,128,0,1,float16,fp8,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,128,0,1,fp8,fp8,0,0.023418667415777843
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,128,0,1,fp8,fp8,0,0.02164799968401591
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,128,0,1,float16,float16,0,0.023082666099071503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,128,0,1,float16,fp8,0,0.023936000963052113
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,128,0,1,fp8,fp8,0,0.02128000060717265
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,128,0,1,float16,float16,0,0.023157333334287006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,128,0,1,float16,fp8,0,0.02366400013367335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,128,0,1,fp8,fp8,0,0.02252800017595291
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,128,0,1,float16,float16,0,0.023189333577950794
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,128,0,1,float16,fp8,0,0.024282666544119518
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,128,0,1,fp8,fp8,0,0.022341333329677582
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,128,0,1,float16,float16,0,0.02327466756105423
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,128,0,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,128,0,1,float16,fp8,0,0.023610666394233704
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,128,0,1,float16,float16,0,0.023152001202106476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,128,0,1,float16,fp8,0,0.023130667706330616
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,128,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,128,0,1,float16,float16,0,0.07663466533025105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,128,0,1,fp8,fp8,0,0.08082133531570435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,128,0,1,float16,fp8,0,0.0761599987745285
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,128,0,1,float16,float16,0,0.04437866806983948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,128,0,1,float16,fp8,0,0.043807998299598694
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,128,0,1,fp8,fp8,0,0.04794666667779287
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,128,0,1,float16,float16,0,0.04190400242805481
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,128,0,1,float16,fp8,0,0.04197866717974345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,128,0,1,fp8,fp8,0,0.04188799858093262
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,128,0,1,float16,float16,0,0.029285334050655365
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,128,0,1,float16,fp8,0,0.027669332921504974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,128,0,1,fp8,fp8,0,0.030063999195893604
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,128,0,1,float16,float16,0,0.027653334041436512
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,128,0,1,float16,fp8,0,0.0296426663796107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,128,0,1,fp8,fp8,0,0.027461332579453785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,128,0,1,float16,float16,0,0.023354666928450268
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,128,0,1,float16,fp8,0,0.025216000775496166
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,128,0,1,float16,float16,0,0.025221332907676697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,128,0,1,fp8,fp8,0,0.02346666653951009
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,128,0,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,128,0,1,float16,float16,0,0.023034666975339253
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,128,0,1,fp8,fp8,0,0.023455999791622162
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,128,0,1,float16,fp8,0,0.022848000129063923
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,128,0,1,fp8,fp8,0,0.021722666919231415
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,128,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,128,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,128,0,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,128,0,1,float16,float16,0,0.020106667031844456
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,128,0,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,128,0,1,float16,float16,0,0.02146666745344798
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,128,0,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,128,0,1,float16,fp8,0,0.02090666691462199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,128,0,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,128,0,1,float16,float16,0,0.019424000134070713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,128,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,128,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,128,0,1,float16,float16,0,0.019679999599854153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,128,0,1,float16,fp8,0,0.019546666493018467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,128,0,1,fp8,fp8,0,0.01940800001223882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,128,0,1,float16,float16,0,0.02088533341884613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,128,0,1,float16,fp8,0,0.019493332753578823
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,128,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,128,0,1,float16,float16,0,0.021002667645613354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,128,0,1,float16,fp8,0,0.019498666127522785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,128,0,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,128,0,1,float16,float16,0,0.021018666525681812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,128,0,1,float16,fp8,0,0.019626667102177937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,128,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,128,0,1,float16,float16,0,0.021173333128293354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,128,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,128,0,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,128,0,1,float16,float16,0,0.045509333411852516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,128,0,1,float16,fp8,0,0.045696000258127846
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,128,0,1,fp8,fp8,0,0.043935999274253845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,128,0,1,float16,float16,0,0.029872000217437744
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,128,0,1,float16,fp8,0,0.02921066681543986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,128,0,1,fp8,fp8,0,0.03153600047032038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,128,0,1,float16,float16,0,0.02951466788848241
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,128,0,1,float16,fp8,0,0.029765332738558452
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,128,0,1,fp8,fp8,0,0.02937600016593933
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,128,0,1,float16,float16,0,0.02120000123977661
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,128,0,1,float16,fp8,0,0.022202665607134502
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,128,0,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,128,0,1,float16,float16,0,0.02143466720978419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,128,0,1,float16,fp8,0,0.021402666966120403
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,128,0,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,128,0,1,float16,float16,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,128,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,128,0,1,float16,float16,0,0.017759999881188076
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,128,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,128,0,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,128,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,128,0,1,float16,float16,0,0.01899733394384384
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,128,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,128,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,128,0,1,float16,fp8,0,0.018885333091020584
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,128,0,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,128,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,128,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,128,0,1,fp8,fp8,0,0.01754133279124896
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,128,0,1,fp8,fp8,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,128,0,1,float16,float16,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,128,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,128,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,128,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,128,0,1,fp8,fp8,0,0.016783999900023144
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,128,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,128,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,128,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,128,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,128,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,128,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,128,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,128,0,1,float16,fp8,0,0.03503466645876566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,128,0,1,float16,float16,0,0.035936000446478523
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,128,0,1,fp8,fp8,0,0.035802667339642845
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,128,0,1,float16,float16,0,0.02351466566324234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,128,0,1,float16,fp8,0,0.02533866713444392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,128,0,1,fp8,fp8,0,0.025792000194390614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,128,0,1,float16,float16,0,0.023423999547958374
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,128,0,1,float16,fp8,0,0.02367999901374181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,128,0,1,fp8,fp8,0,0.025455998877684276
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,128,0,1,float16,float16,0,0.018511999398469925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,128,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,128,0,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,128,0,1,float16,float16,0,0.01904533306757609
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,128,0,1,float16,fp8,0,0.017632000148296356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,128,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,128,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,128,0,1,fp8,fp8,0,0.01747200017174085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,128,0,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,128,0,1,fp8,fp8,0,0.017616000026464462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,128,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,128,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,128,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,128,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,128,0,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,128,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,128,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,128,0,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,128,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,128,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,128,0,1,float16,fp8,0,0.017621333400408428
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,128,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,128,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,128,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,128,0,1,float16,float16,0,0.016810666769742966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,128,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,128,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,128,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,128,0,1,float16,float16,0,0.02959999938805898
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,128,0,1,float16,fp8,0,0.029418667157491047
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,128,0,1,fp8,fp8,0,0.02921066681543986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,128,0,1,float16,float16,0,0.021488000949223835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,128,0,1,float16,fp8,0,0.021562665700912476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,128,0,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,128,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,128,0,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,128,0,1,fp8,fp8,0,0.021386665602525074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,128,0,1,float16,float16,0,0.017488000293572743
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,128,0,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,128,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,128,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,128,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,128,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,128,0,1,float16,float16,0,0.01613333324591319
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,128,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,128,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,128,0,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,128,0,1,float16,float16,0,0.01756799966096878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,128,0,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,128,0,1,fp8,fp8,0,0.01747200017174085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,128,0,1,float16,float16,0,0.015605332950750986
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,128,0,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,128,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,128,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,128,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,128,0,1,float16,fp8,0,0.016682667036851246
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,128,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,128,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,128,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,128,0,1,float16,fp8,0,0.01562133307258288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,128,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,128,0,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,128,0,1,float16,fp8,0,0.01586666703224182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,128,0,1,fp8,fp8,0,0.015504000087579092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,128,0,1,float16,float16,0,0.015909332782030106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,128,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,128,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,128,0,1,float16,float16,0,0.015573333948850632
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,128,0,1,float16,float16,0,0.027488000690937042
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,128,0,1,float16,fp8,0,0.026533332963784535
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,128,0,1,fp8,fp8,0,0.027189334233601887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,128,0,1,float16,float16,0,0.02096533278624217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,128,0,1,float16,fp8,0,0.019445333629846573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,128,0,1,fp8,fp8,0,0.021002667645613354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,128,0,1,float16,float16,0,0.019280000279347103
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,128,0,1,float16,fp8,0,0.01937599976857503
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,128,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,128,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,128,0,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,128,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,128,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,128,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,128,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,128,0,1,float16,float16,0,0.015381333728631338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,128,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,128,0,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,128,0,1,float16,float16,0,0.015429332852363586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,128,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,128,0,1,fp8,fp8,0,0.01545599972208341
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,128,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,128,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,128,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,128,0,1,float16,fp8,0,0.01586666703224182
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,128,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,128,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,128,0,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,128,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,128,0,1,float16,fp8,0,0.01553600033124288
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,128,0,1,float16,float16,0,0.015381333728631338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,128,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,128,0,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,128,0,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,128,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,128,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,0,0.025279998779296875
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,0,0.025674665967623394
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,256,1,2,1,128,0,1,fp8,fp8,0,0.025386666258176167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,0,0.020762667059898376
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,0,0.021226666867733
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,2,2,128,0,1,fp8,fp8,0,0.019546666493018467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,0,0.021349333226680756
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,2,1,128,0,1,fp8,fp8,0,0.019733333339293797
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,0,0.017551999539136887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,2,2,128,0,1,fp8,fp8,0,0.01830400029818217
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,0,0.018751999984184902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,2,1,128,0,1,fp8,fp8,0,0.01756799966096878
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,2,2,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,2,1,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,0,0.018751999984184902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,2,2,128,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,2,1,128,0,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,2,2,128,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,0,0.015882667154073715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,2,1,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,2,2,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,2,1,128,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,2,2,128,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,2,1,128,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,2,2,128,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,2,1,128,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,128,0,1,float16,float16,0,0.3433493375778198
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,128,0,1,fp8,fp8,0,0.3072213331858317
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,128,0,1,float16,fp8,0,0.3433440128962199
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,128,0,1,float16,float16,0,0.22589866320292154
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,128,0,1,fp8,fp8,0,0.19970667362213135
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,128,0,1,float16,fp8,0,0.2233333388964335
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,128,0,1,float16,float16,0,0.20773865779240927
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,128,0,1,fp8,fp8,0,0.19359999895095825
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,128,0,1,float16,fp8,0,0.21047999461491904
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,128,0,1,float16,float16,0,0.22664533058802286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,128,0,1,fp8,fp8,0,0.19961067040761313
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,128,0,1,float16,fp8,0,0.22177066405614218
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,128,0,1,float16,float16,0,0.1671253244082133
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,128,0,1,fp8,fp8,0,0.1548799971739451
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,128,0,1,float16,fp8,0,0.1678239901860555
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,128,0,1,float16,float16,0,0.16062399744987488
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,128,0,1,float16,fp8,0,0.16225600242614746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,128,0,1,fp8,fp8,0,0.14998933672904968
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,128,0,1,float16,float16,0,0.16431466738382974
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,128,0,1,float16,fp8,0,0.16605866948763529
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,128,0,1,fp8,fp8,0,0.14849066734313965
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,128,0,1,float16,float16,0,0.14016000429789224
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,128,0,1,float16,fp8,0,0.14231466253598532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,128,0,1,fp8,fp8,0,0.13220799962679544
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,128,0,1,float16,float16,0,0.136245330174764
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,128,0,1,fp8,fp8,0,0.12827199697494507
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,128,0,1,float16,fp8,0,0.1381066640218099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,128,0,1,float16,float16,0,0.19989333550135294
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,128,0,1,fp8,fp8,0,0.17986132701237997
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,128,0,1,float16,fp8,0,0.20003734032313028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,128,0,1,float16,float16,0,0.12756266196568808
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,128,0,1,fp8,fp8,0,0.11489066481590271
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,128,0,1,float16,fp8,0,0.12637333075205484
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,128,0,1,float16,float16,0,0.11636267105738322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,128,0,1,float16,fp8,0,0.11606933673222859
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,128,0,1,fp8,fp8,0,0.10769599676132202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,128,0,1,float16,float16,0,0.1135093371073405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,128,0,1,float16,fp8,0,0.11361599961916606
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,128,0,1,fp8,fp8,0,0.10525332887967427
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,128,0,1,float16,float16,0,0.13320533434549967
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,128,0,1,float16,fp8,0,0.13267733653386435
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,128,0,1,float16,float16,0,0.09556800127029419
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,128,0,1,fp8,fp8,0,0.11992533008257548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,128,0,1,fp8,fp8,0,0.0895306666692098
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,128,0,1,float16,fp8,0,0.09783466657002766
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,128,0,1,float16,float16,0,0.0930613378683726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,128,0,1,float16,fp8,0,0.09290132919947307
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,128,0,1,fp8,fp8,0,0.08493333061536153
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,128,0,1,float16,float16,0,0.08914666374524434
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,128,0,1,float16,fp8,0,0.09074667096138
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,128,0,1,fp8,fp8,0,0.08495466907819112
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,128,0,1,float16,float16,0,0.12444266676902771
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,128,0,1,float16,fp8,0,0.1260586678981781
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,128,0,1,float16,float16,0,0.0765119989713033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,128,0,1,fp8,fp8,0,0.11691733201344807
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,128,0,1,float16,fp8,0,0.07870933413505554
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,128,0,1,fp8,fp8,0,0.0730506678422292
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,128,0,1,float16,float16,0,0.07026133437951405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,128,0,1,float16,fp8,0,0.07141333321730296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,128,0,1,fp8,fp8,0,0.0664106657107671
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,128,0,1,float16,float16,0,0.0660693347454071
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,128,0,1,float16,fp8,0,0.06774400174617767
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,128,0,1,fp8,fp8,0,0.06233066817124685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,128,0,1,float16,float16,0,0.06479999919732411
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,128,0,1,fp8,fp8,0,0.061850666999816895
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,128,0,1,float16,fp8,0,0.06637333333492279
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,128,0,1,float16,float16,0,0.08303999900817871
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,128,0,1,float16,fp8,0,0.08566400408744812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,128,0,1,float16,float16,0,0.06232533355553945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,128,0,1,fp8,fp8,0,0.07841599980990092
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,128,0,1,float16,fp8,0,0.062352001667022705
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,128,0,1,float16,float16,0,0.056346664826075234
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,128,0,1,fp8,fp8,0,0.06021333237489065
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,128,0,1,float16,fp8,0,0.05635733405749003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,128,0,1,fp8,fp8,0,0.054101333022117615
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,128,0,1,float16,fp8,0,0.05427733560403188
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,128,0,1,float16,float16,0,0.05417599777380625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,128,0,1,fp8,fp8,0,0.04983466863632202
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,128,0,1,float16,float16,0,0.05212266743183136
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,128,0,1,fp8,fp8,0,0.047983999053637184
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,128,0,1,float16,fp8,0,0.05217066903909048
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,128,0,1,float16,float16,0,0.0853706697622935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,128,0,1,float16,fp8,0,0.08704533179601033
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,128,0,1,fp8,fp8,0,0.08292266726493835
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,128,0,1,float16,float16,0,0.052704001466433205
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,128,0,1,float16,fp8,0,0.05393599967161814
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,128,0,1,fp8,fp8,0,0.05195199946562449
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,128,0,1,float16,float16,0,0.047050664822260536
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,128,0,1,float16,fp8,0,0.04801600178082784
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,128,0,1,fp8,fp8,0,0.04576533536116282
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,128,0,1,float16,float16,0,0.04420800010363261
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,128,0,1,float16,fp8,0,0.04381866753101349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,128,0,1,fp8,fp8,0,0.0397119993964831
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,128,0,1,float16,float16,0,0.04205866654713949
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,128,0,1,float16,fp8,0,0.04156800111134847
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,128,0,1,fp8,fp8,0,0.038922667503356934
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,128,0,1,float16,float16,0,0.039877332746982574
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,128,0,1,float16,fp8,0,0.04182933270931244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,128,0,1,fp8,fp8,0,0.037903999288876854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,128,0,1,float16,float16,0,0.06195199986298879
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,128,0,1,float16,fp8,0,0.0639573335647583
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,128,0,1,fp8,fp8,0,0.06046933432420095
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,128,0,1,float16,float16,0,0.044256001710891724
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,128,0,1,float16,fp8,0,0.04409599800904592
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,128,0,1,fp8,fp8,0,0.04373333354791006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,128,0,1,float16,float16,0,0.03969600051641464
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,128,0,1,float16,fp8,0,0.03958400090535482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,128,0,1,fp8,fp8,0,0.03788266579310099
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,128,0,1,float16,float16,0,0.037952000896135964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,128,0,1,float16,fp8,0,0.03783999880154928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,128,0,1,fp8,fp8,0,0.035760000348091125
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,128,0,1,float16,float16,0,0.03575466573238373
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,128,0,1,float16,fp8,0,0.03589866558710734
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,128,0,1,fp8,fp8,0,0.03365866591533025
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,128,0,1,float16,float16,0,0.03566399961709976
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,128,0,1,float16,fp8,0,0.0340639998515447
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,128,0,1,fp8,fp8,0,0.031632001201311745
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,128,0,1,float16,float16,0,0.05855466425418854
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,128,0,1,float16,fp8,0,0.05856533348560333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,128,0,1,fp8,fp8,0,0.0602453351020813
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,128,0,1,float16,float16,0,0.03755733370780945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,128,0,1,float16,fp8,0,0.037658666570981346
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,128,0,1,fp8,fp8,0,0.037808001041412354
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,128,0,1,float16,float16,0,0.031925333042939506
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,128,0,1,float16,fp8,0,0.03363733241955439
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,128,0,1,fp8,fp8,0,0.03142933299144109
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,128,0,1,float16,float16,0,0.02938666691382726
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,128,0,1,float16,fp8,0,0.031514666974544525
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,128,0,1,float16,float16,0,0.029498666524887085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,128,0,1,fp8,fp8,0,0.029050665597120922
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,128,0,1,float16,fp8,0,0.02958933264017105
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,128,0,1,fp8,fp8,0,0.027834666272004444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,128,0,1,float16,float16,0,0.029301332930723827
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,128,0,1,float16,fp8,0,0.029626667499542236
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,128,0,1,fp8,fp8,0,0.02739199995994568
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,128,0,1,float16,float16,0,0.027717334528764088
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,128,0,1,float16,fp8,0,0.029440000653266907
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,128,0,1,fp8,fp8,0,0.026885333160559338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,128,0,1,float16,float16,0,0.05003733436266581
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,128,0,1,float16,fp8,0,0.05018133421738943
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,128,0,1,fp8,fp8,0,0.0526506652434667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,128,0,1,float16,float16,0,0.03175999969244003
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,128,0,1,float16,fp8,0,0.03166933357715607
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,128,0,1,fp8,fp8,0,0.0317546675602595
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,128,0,1,float16,float16,0,0.02754666656255722
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,128,0,1,float16,fp8,0,0.026943999032179516
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,128,0,1,fp8,fp8,0,0.027456000447273254
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,128,0,1,float16,float16,0,0.025557334224383037
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,128,0,1,float16,fp8,0,0.02369600037733714
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,128,0,1,float16,float16,0,0.023157333334287006
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,128,0,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,128,0,1,float16,fp8,0,0.023311999936898548
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,128,0,1,float16,float16,0,0.023152001202106476
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,128,0,1,fp8,fp8,0,0.021557333568731945
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,128,0,1,float16,fp8,0,0.023605334262053173
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,128,0,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,128,0,1,float16,float16,0,0.022485333184401195
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,128,0,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,128,0,1,fp8,fp8,0,0.02124800036350886
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,128,0,1,float16,float16,0,0.023002666731675465
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,128,0,1,float16,fp8,0,0.023215999205907185
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,128,0,1,fp8,fp8,0,0.021205333371957142
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,128,0,1,float16,float16,0,0.029504001140594482
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,128,0,1,fp8,fp8,0,0.027503999571005504
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,128,0,1,float16,float16,0,0.025114665428797405
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,128,0,1,float16,fp8,0,0.029904000461101532
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,128,0,1,float16,fp8,0,0.025199999411900837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,128,0,1,float16,float16,0,0.022687998910744984
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,128,0,1,fp8,fp8,0,0.024832000335057575
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,128,0,1,float16,fp8,0,0.02350933353106181
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,128,0,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,128,0,1,float16,float16,0,0.021482666333516438
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,128,0,1,float16,fp8,0,0.021253332495689392
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,128,0,1,float16,float16,0,0.019354666272799175
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,128,0,1,fp8,fp8,0,0.019632000476121902
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,128,0,1,float16,fp8,0,0.02161066730817159
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,128,0,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,128,0,1,float16,float16,0,0.02091199904680252
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,128,0,1,float16,fp8,0,0.020138667275508244
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,128,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,128,0,1,float16,float16,0,0.021183999876181286
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,128,0,1,float16,fp8,0,0.021045332153638203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,128,0,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,128,0,1,float16,float16,0,0.02107200026512146
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,128,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,128,0,1,fp8,fp8,0,0.018378666291634243
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,128,0,1,float16,float16,0,0.021738665799299877
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,128,0,1,float16,fp8,0,0.02274666726589203
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,128,0,1,fp8,fp8,0,0.02109333376089732
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,128,0,1,float16,float16,0,0.01883200059334437
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,128,0,1,float16,fp8,0,0.019413333386182785
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,128,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,128,0,1,float16,float16,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,128,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,128,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,128,0,1,float16,fp8,0,0.017717332889636356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,128,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,128,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,128,0,1,float16,float16,0,0.017616000026464462
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,128,0,1,float16,float16,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,128,0,1,fp8,fp8,0,0.015967999895413715
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,128,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,128,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,128,0,1,float16,float16,0,0.019306667149066925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,128,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,128,0,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,128,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,128,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,128,0,1,fp8,fp8,0,0.015685333559910457
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,128,0,1,float16,fp8,0,0.017642666896184284
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,128,0,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,128,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,128,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,128,0,1,float16,fp8,0,0.01573333392540614
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,128,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,128,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,128,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,128,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,128,0,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,128,0,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,128,0,1,float16,float16,0,0.01670933390657107
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,128,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,128,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,128,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,128,0,1,fp8,fp8,0,0.01551466683546702
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,128,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,128,0,1,float16,fp8,0,0.017498667041460674
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,128,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,128,0,1,float16,fp8,0,0.017573333034912746
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,128,0,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,128,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,128,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,128,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,128,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,128,0,1,fp8,fp8,0,0.016751999656359356
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,128,0,1,float16,float16,0,0.015381333728631338
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,128,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,128,0,1,fp8,fp8,0,0.016704000532627106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,128,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,128,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,128,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,128,0,1,fp8,fp8,0,0.01534933348496755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,128,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,128,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,128,0,1,float16,fp8,0,0.015935999651749928
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,128,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,128,0,1,float16,float16,0,0.01534933348496755
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,128,0,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,128,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,128,0,1,float16,float16,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,128,0,1,fp8,fp8,0,0.01618133361140887
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,128,0,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,128,0,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,128,0,1,float16,float16,0,0.01571200042963028
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,128,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,0,0.01781333362062772
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,1,1,128,0,1,fp8,fp8,0,0.017423999806245167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,0,0.019424000134070713
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,0,0.01810666670401891
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,64,1,1,1,128,0,1,fp8,fp8,0,0.01747200017174085
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,0,0.017338667064905167
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,32,1,1,1,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,0,0.015909332782030106
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,0,0.015429332852363586
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,16,1,1,1,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,0,0.015696000307798386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,8,1,1,1,128,0,1,fp8,fp8,0,0.016522667060295742
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,0,0.016778666526079178
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,0,0.015781333049138386
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,4,1,1,1,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,2,1,1,1,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,0,0.015552000453074774
TRTLLM,1.1.0,NVIDIA GB200,context_attention,torch_flow,1,1,1,1,128,0,1,fp8,fp8,0,0.015376000354687372
